DeepMoA: method to predict the mechanism of action of cancer drugs

Select data and import libraries

Code
import sys # we require code from other folders
import pandas as pd
import numpy as np
import itertools
import pickle
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
Code
import seaborn as sns
import matplotlib.pyplot as plt
CB_color_cycle = ['#EECC16', '#62BB35', '#FDAE33','#208EA3', '#EA4E9D', '#984ea3','#999999', '#e41a1c', '#dede00']
#sns.set_style("darkgrid")
Code
import matplotlib.font_manager as fm
font_files = fm.findSystemFonts()

plt.rcdefaults()
# Go through and add each to Matplotlib's font cache.
for font_file in font_files:
    fm.fontManager.addfont(font_file)
plt.rc('font', family='Roboto')
Code
plt.rc('font', family='Roboto')

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'
Code
#%config InlineBackend.figure_format='retina'
Code
# pytorch relates imports
import torch
import torch.nn as nn
import torch.optim as optim

# imports from captum library
from captum.attr import LayerDeepLift
Code
# for combobox
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
Code
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
Code
pd.options.display.min_rows = 20000
pd.set_option('max_colwidth', 200)
Code
pd.options.display.max_rows = 20000
Code
pd.set_option('min_rows', 20000)
Code
mac = "/Users/katyna/Library/CloudStorage/OneDrive-Tecnun/"
windows = "C:/Users/ksada/OneDrive - Tecnun/"
computer = windows # CHANGE
Code
sys.path.append(computer + "SparseGO_code/code")
import util
from util import *
Code
%matplotlib inline
Code
#%matplotlib inline

# To make histograms
def histogram(dataframe, color, title, ylabel,n_bins):
    N, bins, patches = plt.hist(dataframe, color=color,bins=n_bins, linewidth=0.1)

    for i in range(0,len(bins)-1):
        if bins[i]<0.05:
            patches[i].set_facecolor(CB_color_cycle[2])

    plt.xlabel("P-value", fontsize=16)  
    plt.ylabel(ylabel, fontsize=16)
    plt.title(title, fontsize=16)
    plt.xticks(fontsize=14)  
    plt.yticks(fontsize=14)
    ax = plt.subplot(111)  
    ax.spines["top"].set_visible(False)  
    ax.spines["right"].set_visible(False)    
Code
inputdir = computer+"SparseGO_code/data/cross_validation_expression/allsamples/" # CHANGE
dir1=computer+"Tesis/Codigo/VariableImportance/"
dir2=computer+"SparseGO_code/results/weights&biases/Expression_MSE_all/" # CHANGE
resultsdir=dir2
Code
gene2id = inputdir+"gene2ind.txt"
cell2id=inputdir+"cell2ind.txt"
drug2id=inputdir+"drug2ind.txt"
drug2fingerprint=inputdir+"drug2fingerprint.txt"
load=resultsdir+"last_model.pt"

onto = inputdir+"ontology.txt"  # CHANGE 
genotype=inputdir+"cell2expression.txt"  # CHANGE 

num_neurons_per_GO = 6 # CHANGE

DeepLIFT

Code
gene2id_mapping = load_mapping(gene2id)
dG, terms_pairs, genes_terms_pairs = load_ontology(onto, gene2id_mapping)
sorted_pairs, level_list, level_number = sort_pairs(genes_terms_pairs, terms_pairs, dG, gene2id_mapping)
layer_connections = pairs_in_layers(sorted_pairs, level_list, level_number) 

cell_features = np.genfromtxt(genotype, delimiter=',')
drug_features = np.genfromtxt(drug2fingerprint, delimiter=',')

drug2id_mapping = load_mapping(drug2id)
cell2id_mapping = load_mapping(cell2id)

num_genes = len(gene2id_mapping)
drug_dim = len(drug_features[0,:])
There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components
Code
model = torch.load(load, map_location='cuda:%d' % 0)
Code
model
sparseGO_nn(
  (genes_terms_sparse_linear_1): SparseLinearNew(
    in_features=15015, out_features=25104, bias=True, sparsity=0.0030196221878822263, connectivity=tensor([[    0,     1,     2,  ..., 23721, 23722, 23723],
            [    0,     0,     0,  ..., 15014, 15014, 15014]], device='cuda:0'), small_world=False
  )
  (genes_terms_tanh): Tanh()
  (genes_terms_batchnorm): BatchNorm1d(25104, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_1): SparseLinearNew(
    in_features=25104, out_features=8304, bias=True, sparsity=0.002372788160788691, connectivity=tensor([[  966,   967,   968,  ...,  7047,  7048,  7049],
            [    0,     0,     0,  ..., 25103, 25103, 25103]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_1): Tanh()
  (GO_terms_batchnorm_1): BatchNorm1d(8304, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_2): SparseLinearNew(
    in_features=8304, out_features=3684, bias=True, sparsity=0.003911619061964564, connectivity=tensor([[   0,    1,    2,  ..., 3681, 3682, 3683],
            [   0,    0,    0,  ..., 8303, 8303, 8303]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_2): Tanh()
  (GO_terms_batchnorm_2): BatchNorm1d(3684, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_3): SparseLinearNew(
    in_features=3684, out_features=1650, bias=True, sparsity=0.007924193070772875, connectivity=tensor([[ 150,  151,  152,  ..., 1641, 1642, 1643],
            [   0,    0,    0,  ..., 3683, 3683, 3683]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_3): Tanh()
  (GO_terms_batchnorm_3): BatchNorm1d(1650, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_4): SparseLinearNew(
    in_features=1650, out_features=726, bias=True, sparsity=0.015807663410969196, connectivity=tensor([[ 474,  475,  476,  ...,  711,  712,  713],
            [   0,    0,    0,  ..., 1649, 1649, 1649]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_4): Tanh()
  (GO_terms_batchnorm_4): BatchNorm1d(726, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_5): SparseLinearNew(
    in_features=726, out_features=318, bias=True, sparsity=0.03305785123966942, connectivity=tensor([[ 60,  61,  62,  ..., 105, 106, 107],
            [  0,   0,   0,  ..., 725, 725, 725]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_5): Tanh()
  (GO_terms_batchnorm_5): BatchNorm1d(318, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_6): SparseLinearNew(
    in_features=318, out_features=120, bias=True, sparsity=0.06981132075471698, connectivity=tensor([[  0,   1,   2,  ...,  93,  94,  95],
            [  0,   0,   0,  ..., 317, 317, 317]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_6): Tanh()
  (GO_terms_batchnorm_6): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_7): SparseLinearNew(
    in_features=120, out_features=42, bias=True, sparsity=0.2, connectivity=tensor([[ 18,  19,  20,  ...,  21,  22,  23],
            [  0,   0,   0,  ..., 119, 119, 119]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_7): Tanh()
  (GO_terms_batchnorm_7): BatchNorm1d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_8): SparseLinearNew(
    in_features=42, out_features=30, bias=True, sparsity=1.0, connectivity=tensor([[ 0,  1,  2,  ..., 27, 28, 29],
            [ 0,  0,  0,  ..., 41, 41, 41]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_8): Tanh()
  (GO_terms_batchnorm_8): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_1): Linear(in_features=2048, out_features=200, bias=True)
  (drug_tanh_1): Tanh()
  (drug_batchnorm_layer_1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_2): Linear(in_features=200, out_features=100, bias=True)
  (drug_tanh_2): Tanh()
  (drug_batchnorm_layer_2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_3): Linear(in_features=100, out_features=50, bias=True)
  (drug_tanh_3): Tanh()
  (drug_batchnorm_layer_3): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (final_linear_layer): Linear(in_features=80, out_features=40, bias=True)
  (final_tanh): Tanh()
  (final_batchnorm_layer): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (final_aux_linear_layer): Linear(in_features=40, out_features=1, bias=True)
  (final_aux_tanh): Tanh()
  (final_linear_layer_output): Linear(in_features=1, out_features=1, bias=True)
)
Code
# Save layers to be analyzed
model_layers = []
model_layers.append(model.genes_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_2)
model_layers.append(model.GO_terms_sparse_linear_3)
model_layers.append(model.GO_terms_sparse_linear_4)
model_layers.append(model.GO_terms_sparse_linear_5)
model_layers.append(model.GO_terms_sparse_linear_6)
model_layers.append(model.GO_terms_sparse_linear_7)

GO terms info

Code
# Go term names
gene_ontology = pd.read_excel('all_go_terms_info.xlsx')

Get all layers’ GO term with the neuron number

Code
all_terms_ids = {}
all_terms_names = {}
all_layers_non_virtual = {} # store only terms that are part of the layer (remove virtual), those are the important attribuitions
all_layers_non_virtual_names = {}
num_neurons_per_GO = 6

for layer_number in range(len(layer_connections)-1):
    layer_pairs = layer_connections[layer_number] 
    
    terms_ids = []
    names = []
    output_id = create_index(layer_pairs[:,0]) # first 6 neurons correspond to the term with key 0
    
    for term in output_id.keys():
        #name = gene_ontology.loc[gene_ontology['GO_term'] == term].to_numpy()[0,3].replace("_"," ").capitalize()
        name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
        for i in range(1,num_neurons_per_GO+1): # vector que tiene GO:0000038_1, GO:0000038_2 ... GO:0000038_6 y asi luego concatenar con las attributions
            terms_ids.append(term+"_"+str(i))
            names.append(name+" ("+str(i)+")")
    all_terms_ids[layer_number] = np.array(terms_ids)
    all_terms_names[layer_number] = np.array(names)

    non_virtual = [] # store the terms part of that layer
    non_virtual_names = []
    for term in level_list[layer_number+1]:
        nv_name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
        for i in range(1,7):
            non_virtual.append(term+"_"+str(i))
            non_virtual_names.append(nv_name+" ("+str(i)+")")
    all_layers_non_virtual[layer_number] = non_virtual
    all_layers_non_virtual_names[layer_number] = non_virtual_names

All GO terms part of a layer (non-virtual) with their corresponding name and layer number…

Code
real_go_info = pd.DataFrame({"GO_term":[],"Name":[],"layer_number":[]})
for layer_number in range(len(layer_connections)-1):
    layer_go_info = pd.DataFrame({"GO_term":all_layers_non_virtual[layer_number],"Name":all_layers_non_virtual_names[layer_number],"layer_number":(layer_number)})
    real_go_info = pd.concat((real_go_info,layer_go_info))
real_go_info.head()
GO_term Name layer_number
0 GO:0000019_1 Regulation of mitotic recombination (1) 0.0
1 GO:0000019_2 Regulation of mitotic recombination (2) 0.0
2 GO:0000019_3 Regulation of mitotic recombination (3) 0.0
3 GO:0000019_4 Regulation of mitotic recombination (4) 0.0
4 GO:0000019_5 Regulation of mitotic recombination (5) 0.0

Drugs info

Code
def get_compound_names(file_name):
    compounds = []

    with open(file_name, 'r') as fi:
        for line in fi:
            tokens = line.strip().split('\t')
            compounds.append([tokens[1],tokens[2]])
    return compounds
Code
drugs = get_compound_names(inputdir+"compound_names.txt")
drugs.pop(0)
['SMILE', 'Name']

DeepLIFT for VNN

Reference activation… (baseline)

Code
median_cell_features = np.median(cell_features,axis=0) # to use as a reference
median_drug_features = np.genfromtxt(computer+"SparseGO_code/data/glucose_fingerprint.txt", delimiter=',')

Attribution function: sum

Code
def get_layer_attribution(layer_number,input_data,baseline,selected_drug_data):
    dl = LayerDeepLift(model, model_layers[layer_number],multiply_by_inputs = True) # CHOOSE LAYER TO STUDY
    dl_attr_test = dl.attribute(input_data,baseline)
    dl_attr_test_sum = dl_attr_test.cpu().detach().numpy().sum(0) # se suman las attributions para cada sample
    
    attribution_data = pd.DataFrame(np.column_stack((all_terms_ids[layer_number],dl_attr_test_sum)), columns=["GO_term",selected_drug_data[1]])
    attribution_data[[selected_drug_data[1]]] = attribution_data[[selected_drug_data[1]]].apply(pd.to_numeric).round(10)
    attribution_data = attribution_data.loc[attribution_data['GO_term'].isin(all_layers_non_virtual[layer_number])] # only the keep the non virtual terms
    return attribution_data

DeepLIFT for all drugs

Code
attribution_data_all = pd.DataFrame()
# Obtain the top GO terms on all layers for each drug
for selected_drug_data in drugs:
    selected_drug =selected_drug_data[0] # DRUG smile
    selected_drug_features = []
    drug_specific_features=drug_features[drug2id_mapping[selected_drug]] # features of drug
    
    for i in range(len(cell2id_mapping)): # make all combinations of selected_drug and cell types 
        selected_drug_features.append(np.concatenate((cell_features[i], drug_specific_features), axis=None))
    selected_drug_features = torch.FloatTensor(np.array(selected_drug_features))

    # Data for deeplift...
    input_data = torch.autograd.Variable(selected_drug_features.cuda(0))
    
    #median_drug_features = drug_specific_features
    # baseline is the median of the expression data and drug features 
    baseline = torch.FloatTensor(np.concatenate((median_cell_features, median_drug_features), axis=None))
    baseline = torch.reshape(baseline, (1, baseline.size()[0]))
    baseline = torch.autograd.Variable(baseline.cuda(0))
    
    attribution_data_drug = list(map(get_layer_attribution,range(0,len(model_layers)),itertools.repeat(input_data, len(model_layers)),itertools.repeat(baseline, len(model_layers)),itertools.repeat(selected_drug_data, len(model_layers)))) # get the attribution for each layer (map is similar to apply)
    attribution_data_drug = pd.concat(attribution_data_drug) # concatenate attribution of all layers
    
    attribution_data_all = pd.concat([attribution_data_all,attribution_data_drug.iloc[:,1]], axis=1)
    
    print(selected_drug_data[1])
    
attribution_data_all = pd.concat([attribution_data_drug.iloc[:,0],attribution_data_all], axis=1)
BRD-K02251932-001-01-3
BRD-K25737009-001-01-2
Nintedanib
bicalutamide
N-[(2R,3S)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide
PHA-665752
N-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1H-indazol-6-yl]benzamide
Ki8751
IPA-3
FAWUGYGEBHAQBU-PPEXNQRJSA-N
retinol + SCHEMBL2671349
BRD-K05870596-001-01-4
Onalespib
Cediranib
Vemurafenib + Erlotinib
ZINC113660258
NSC60043
WPTTVJLTNAWYAO-CDYPJPISSA-N
brefeldin A
AZD7762
SCHEMBL2139153
staurosporine
HMS1361J12
N-[(2R,3S)-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-3-(4-morpholinyl)propanamide
FGIN-1-27
BRD-K30019337-001-01-1
NVP-TAE684
apicidin
Purmorphamine
Decitabine
Tivozanib
MK-2206
MKWLQYDUWJBEKU-LWSJDIAFSA-N
16beta-Bromoandrosterone
BRD-K16147474-001-01-1
Palbociclib
thapsigargin
SCHEMBL13833463
SNS-032
OSU-03012
Necrostatin-1
BI 2536
BRD-K53792571-003-01-6
JQ1 + SCHEMBL2671349
MNULEGDCPYONBU-PAMDCEDJSA-N
gemcitabine
LY2183240
CHEMBL436817
Serdemetan
UNC-0638 + SCHEMBL2671349
Ixazomib
Panobinostat
cimetidine
Mirdametinib
tacrolimus
BRD-K52037352-001-01-6
CP-724714
Dinaciclib
GDC-0879
TW-37
Fedratinib
retinol + Navitoclax
Palmostatin B
RSK inhibitor Fmk
N-[(2S,3S)-2-[[[(cyclohexylamino)-oxomethyl]-methylamino]methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]-2-(1-methyl-3-indolyl)acetamide
Lenalidomide
BMS-345541;CC1=CC2=C(C=C1)N=C(C3=NC=C(N23)C)NCCN.Cl
HHDWUYJENPRCSP-UTTPPHFYSA-N
N9-ISOPROPYL-OLOMOUCINE
Brivanib
CHEMBL2058177
Bendamustine
SB-216763
Tanespimycin + Docetaxel (Taxotere)
CIL56
N-[2-methyl-5-[2-oxo-9-(1H-pyrazol-4-yl)-1-benzo[h][1,6]naphthyridinyl]phenyl]-2-propenamide
BX-912
Rigosertib
Tanespimycin + gemcitabine
Neratinib
Parbendazole
SCHEMBL11942935
CHEMBL3183639
BRD-K09587429-001-01-3
ML311
NSC87877
CID5951923
SMR000198998
Selumetinib + GDC-0941
Nsc 23766
CHEMBL2203525
SR1001
N-[[(4R,5R)-2-[(2R)-1-hydroxypropan-2-yl]-4-methyl-8-(4-methylpent-1-ynyl)-1,1-dioxo-4,5-dihydro-3H-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-N-methyl-2-pyrazinecarboxamide
importazole
ML-210
Lomeguatrib
BMS-345541;CC1=CC2=C(C=C1)N=C(C3=NC=C(N23)C)NCCN
vinblastine
ERK5-IN-1
temozolomide
AM580
Cdk4/6 Inhibitor IV
Tosedostat
Pluripotin
CHEMBL3185999
chlorambucil
WH-4-023
DMOG
SNX-2112
ZM-447439
PDK1 inhibitor
Idelalisib
Teniposide [USAN]
Wee1 Inhibitor
CHEMBL2356172
CHEMBL24850
N'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;C=CCC1=CC=C/C(=C/NNC(=O)CN2CCN(CC2)CC3=CC=CC=C3)/C1=O
Cot inhibitor-2
BRD-A28105619-001-01-3
Olaparib
MIRA-1
TCMDC-123515
Venetoclax
pifithrin
Docetaxel (Taxotere)
cerulenin
JQ1 + UNC0638
SCHEMBL916391
Leptomycin B
PF-4800567
SCHEMBL15422028
Pazopanib
CHEMBL585951
SCHEMBL16479156
Amuvatinib
SCHEMBL6874948
GSK-650394
CI 976
BRD-A15100685-001-01-8
thalidomide
Neopeltolide
BRD-K19103580-001-01-2
ifosfamide
GSK1070916
Z-LLNle-CHO
Navitoclax + Alisertib
GDC-0941
UNC0638 + Selumetinib
Vorinostat + SCHEMBL2671349
niclosamide
CHM-1
nan + Navitoclax(1)
Telomerase Inhibitor IX
EMBELIN
WZ4002
ISX-9
CHEMBL3182697
AC-55649
PI-103
Cabozantinib
AICA ribonucleotide
RSL3
TAK-715
IKK-3 Inhibitor
SCHEMBL12474870
OTKWUBXKTHWZKE-FUOPVMCBSA-N
Pevonedistat
cyclosporin A
MLS001198989
N-[2-methyl-5-[oxo-[3-(1-oxoprop-2-enylamino)-5-(trifluoromethyl)anilino]methyl]phenyl]-5-isoxazolecarboxamide
BRD-K17060750-001-01-0
ZSTK474
GW2580
BRD4770
SB-525334
Vemurafenib + Crizotinib
prochlorperazine
Selisistat
OSI-027;COC1=CC=CC2=CC(=C3C4=C(N=CNN4C(=N3)C5CCC(CC5)C(=O)O)N)N=C21
SCHEMBL18426910
Bosutinib
LSM-36779
SCHEMBL18216694
Tipifarnib
N-[(2S,3S)-2-[(dimethylamino)methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-10-yl]-2,5-dimethyl-3-pyrazolecarboxamide
CHEMBL520231
vinorelbine
BRD-K62801835-001-01-0
GSK-690693
MGCD-265
AR-42
VX-11e
lfm-a13
Nelarabine
KW-2449
I-BET-762
TG100-115
DFSDBFJUWANYES-UBWKHRTASA-N
SCHEMBL2671349
GSK1904529A
NG25
SCHEMBL12041987
Epothilone B
Trametinib
KX2-391
Ro-3306
AHPN
LSM-13729
Pelitinib
gossypol
XL765
Bortezomib + CAY10603
ABT-737
Belinostat
N-[[(4S,5R)-8-[2-(2-fluorophenyl)ethynyl]-2-[(2S)-1-hydroxypropan-2-yl]-4-methyl-1,1-dioxo-4,5-dihydro-3H-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-N-methyl-2-pyridin-4-ylacetamide
UNC0321
Pluripotin + Navitoclax
CH 55
Motesanib
MI-2
GSK4112
Piperlongumine + Telomerase Inhibitor IX
Sepantronium + bromide
Valdecoxib
GNF-2
RAD51 inhibitor B02
SCHEMBL12469828
Veliparib
Bryostatin 1
N-[6-(2-amino-4-fluoroanilino)-6-oxohexyl]-4-methylbenzamide
AZ3146
Obatoclax + METHANESULFONIC ACID
docetaxel
BRD-1240
BMS-509744
BMS-536924;CC1=CC(=CC2=C1N/C(=C\3/C(=CC=NC3=O)NC[C@H](C4=CC(=CC=C4)Cl)O)/N2)N5CCOCC5
AK174031 + MK-1775
Clofarabine
Sunitinib
MK-0752
Vemurafenib
SCHEMBL10183194
Pifithrin-mu
JQ1 + MK-0752
AZD7545
SCHEMBL10436373
C6 ceramide
Nutlin-3
parthenolide;C/C/1=C\CC[C@@]2([C@H](O2)[C@@H]3[C@@H](CC1)C(=C)C(=O)O3)C
GW 441756
Vemurafenib + Navitoclax
Austocystin D
BRD-K33514849-001-01-9
AS-605240
N-[(2R,3S)-2-[[(4-chlorophenyl)sulfonyl-methylamino]methyl]-5-[(2S)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4,4,4-trifluorobutanamide
Dabrafenib
PHA-793887
N-[3-[[2-[[4-(dimethylamino)cyclohexyl]amino]-9-propan-2-yl-6-purinyl]amino]phenyl]-2-propenamide
CHEMBL2132053
SMER-3
BMS-754807
KU-55933
A-443654
N-[(2S,3S,6R)-2-(hydroxymethyl)-6-[2-(4-methyl-1-piperazinyl)-2-oxoethyl]-3-oxanyl]-1,3-benzodioxole-5-carboxamide
paclitaxel;CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@H]3[C@@H]([C@@](C2(C)C)(C[C@@H]1OC(=O)[C@@H]([C@H](C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C
dichloroplatinum diammoniate
CCT036477
Sotrastaurin
nutlin-3A
SCHEMBL4463213
GSK1059615
N-[3-(1H-benzimidazol-2-yl)-5-(1-piperazinylmethyl)phenyl]-2-quinoxalinecarboxamide
AT7519
CUDC-101
RG108
simvastatin
NSC207895
Tandutinib
L-685458
Navitoclax + Decitabine
Piperlongumine
isoliquiritigenin
PD153035
NSC373989
CBB1007
Tipifarnib (S enantiomer)
TPCA-1
etoposide
TGX-221
SGX-523
SCHEMBL12182311
5-azacytidine
IU-1
Pyrazolanthrone
GSK-J4
Canertinib
itraconazole
JW74
Alisertib
Imatinib
DNMDP-2
RAF265
Mps1-IN-1 + hydrochloric acid
AZD4547
BRD1172
NVP-BEZ235
dacarbazine
BRD-K35716340-001-01-2
T0901317
1009820-21-6
ML-030
CAY10594
CHS-828
KHS101
SL-0101
CHEMBL515416
BRD-K02492147-001-01-4
Ibrutinib
CHEBI:94110
Tanespimycin
BRD-6929
Navitoclax
BAM7
SCH-79797
CBPNZQVSJQDFBE-RERLVDEVSA-N
Temsirolimus
SCHEMBL15428380
N'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;C=CCC1=CC=CC(=CNNC(=O)CN2CCN(CC2)CC3=CC=CC=C3)C1=O
phloretin
Spautin-1
UBRVGBLDXDOETM-UHFFFAOYSA-N
OSI-930
BRD-K20514654-001-01-8
BAY 61-3606 + hydrochloric acid
Fingolimod
S-Trityl-L-cysteine
A-770041
GSK461364
SCHEMBL2586580
Abiraterone
JQ1 + Navitoclax
MI-1
sb 225002
CHEMBL2143553
ethyl 5,5,7,7-tetramethyl-2-(5-nitrothiophene-2-carboxamido)-4,5,6,7-tetrahydrothieno[2,3-c]pyridine-3-carboxylate
Glutaminase C-IN-1
Valisone
GSK429286A
Bortezomib + SNX-2112
JW 480
11-cis Retinoic Acid
GQRREYKSPJMLAW-YGNUMJMVSA-N
BRD3308
Selumetinib + MK-2206
Selumetinib
ouabain
CID-2858522
A-804598
paclitaxel
SCHEMBL16046542
MTLMDZJUGDUTCP-YWEFRBEISA-N
N-[(2R,3S)-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]methanesulfonamide
Curcumin, Curcuma longa L.
AZ-628
RUCAPARIB
BIX-01294
XHQLYWYICDKTPJ-UHFFFAOYSA-N
Luminespib
CHEMBL258148
AGK2
STF-62247
SCHEMBL15444220
BRD-K04800985-001-01-1
CYCLOPAMINE
SCHEMBL4320913
BRD6708
Midostaurin
LSM-6185
Refametinib
SKI II
SpOx1_002925
Vemurafenib + Selumetinib
Navitoclax + gemcitabine
mitomycin C
Axitinib
PDIPALLOXOFUBU-UHFFFAOYSA-N
parthenolide;C/C/1=C/CC[C@@]2([C@H](O2)[C@@H]3[C@@H](CC1)C(=C)C(=O)O3)C
Tozasertib
TCMDC-125552
Ko 143
Navitoclax + Selumetinib
N-[(2S,3S)-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4-pyridinecarboxamide
YK-4-279
VAF347
EPZ004777 + SCHEMBL2671349
6-BIO
SCHEMBL1710881
UNC0638 + Navitoclax
PCI-34051
BRD-K33199242-001-01-2
NSC 95397
Necrosulfonamide
CHEBI:119735
NSC74859
CGP-60474
Regorafenib
SCHEMBL14934014
PF 750
BRD-A59431241-001-01-1
PF-562271
Bortezomib + nan
YM-201636
Avrainvillamide
CHEMBL2062550
BRD-K29086754-001-01-7
fqi1
BRD-K49456190-001-01-0
BX-795
CHEMBL416418
myricetin
Entinostat
roscovitine
Galunisertib
Skepinone-L
KU-0060648
BIBR 1532
trifluoperazine
GSK269962A
SCHEMBL16273428
METHYLSTAT
Lenvatinib
bms270394
NSC136476
ERASTIN
Fulvestrant
I-BET151
cytarabine
PONATINIB
Bleomycin sulfate
SCHEMBL16296919
N-(2,5-dimethoxyphenyl)sulfonyl-N-(4-methoxyphenyl)benzamide
Obatoclax
Marinopyrrole A
homoharringtonine
BRD-A63646118-001-02-6
tamoxifen
Alectinib
SCHEMBL1914213
SZ4TA2
etoposide + SCHEMBL2671349
BRD-K27986637-001-01-3
CX-5461
N-[[(2S,3S)-8-[2-(1-hydroxycyclopentyl)ethynyl]-5-[(2S)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-pyrido[2,3-b][1,5]oxazocin-2-yl]methyl]-N-methyl-4-oxanecarboxamide
ETP-46464
JW-55
CHEMBL2152368
Doramapimod
CHEMBL2398212
CCT007093
CHEMBL2206358
Y-39983
Avagacestat
BRD-K49290616-001-01-9
GSK2636771
KCBBHEKXEHMWFW-YQZFVPMHSA-N
BRD-K50799972-001-01-3
SCHEMBL17821363
ICG-001
SRLVTMSBRCMODY-QXPFVDMISA-N
JQ1 + Vorinostat
N-(4-methoxyphenyl)sulfonyl-N-[2-[2-(1-oxido-4-pyridin-1-iumyl)ethenyl]phenyl]acetamide
SCH-529074 + JNJ-26854165
doxorubicin + Navitoclax
AT7867
Turofexorate isopropyl
Ruxolitinib
Navitoclax + Vorinostat
shikonin
StemRegenin 1
NAPHTHO(2,1-b)FURAN, 1-METHYL-2-NITRO-
FTTYFNWRWDLFLP-UHFFFAOYSA-N
Sorafenib
betulinic acid
Tubastatin A
N-(2-(4-(2-Oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)piperidin-1-yl)ethyl)-2-naphthamide
AZD1480
RO4929097
GSK2126458
prima-1
PF-4708671
CYTOCHALASIN B
DASA-58
PHENFORMIN
Bemcentinib
ethyl 4-[4-[(5-nitrofuran-2-yl)methylidene]-3,5-dioxopyrazolidin-1-yl]benzoate
C646
MG-132
CAY10603
SU11274
CHEMBL2180739
LSM-6189
sildenafil
KPT-185
Vorinostat
MLS000106215
GW843682X
Afatinib
Navitoclax + Sorafenib
Navitoclax + Piperlongumine
pyrimethamine
MK-1775
Bortezomib + Tanespimycin
ACHP
Retinoic acid
PLX-4720
N-[(2R,3R)-5-[(2S)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-10-yl]-4-pyridinecarboxamide
Linsitinib
Masitinib
BIX 02189
Salermide
FTI-277
AS601245
WHI-P97
MLN2480
Oprea1_718426
PF-3758309
retinol + Selumetinib
nan + nan
CCT 018159
PD318088
Nilotinib
Mdivi-1
CHIR-99021
SCHEMBL618594
1,2-Cyclohexanediamine anion + oxalic acid
Vemurafenib + Salermide
salubrinal
SCH-529074
topotecan
SCHEMBL18188080
AZD6482
GW-405833
Istradefylline
bleomycin
SB-431542
NVP-ADW742
Navitoclax + Telomerase Inhibitor IX
Lestaurtinib
SCHEMBL6465274
OQHLPAAWWGDXAW-UHFFFAOYSA-N
KU-60019
CHEMBL3188232
ethyl 2-cyano-3-(3,4-dichlorophenyl)acryloylcarbamate
J3.559.058G
Foretinib
FR-180204
SCHEMBL2085358
azanide; dichloroplatinum(2+)
nan + Navitoclax(2)
Cyt387
NSC48300
PF 184
cyclophosphamide
QS11
BRD-K41597374-001-01-7
JQ1 + Selumetinib
N'-(2-pyrrolylidenemethyl)-2-(2,4,6-trichlorophenoxy)acetohydrazide
AK174031
QL47
BMS614
Ispinesib + METHANESULFONIC ACID
(S)-selisistat
N-[(2R,3S)-5-[(2S)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]cyclohexanecarboxamide
CP-466722
Isonicotinohydroxamic acid
Darinaparsin
Akt inhibitor VIII
Selumetinib + Piperlongumine
CHEMBL2381520
WZ8040
Saracatinib
FHTVASVNEUEMIV-LWSJDIAFSA-N
PF-573228
HC-067047
NU-7441
Ku-0063794
PIK-93
SCHEMBL13833318
(-)-Epigallocatechin gallate
EPZ004777 + Selumetinib
EPZ004777 + Navitoclax
SCHEMBL13737661
N1-[2-(1H-indol-3-yl)ethyl]-N3-pyridin-4-ylbenzene-1,3-diamine
SR8278
Bexarotene
o6-benzylguanine
(-)-Rapamycin
Sepantronium
SCHEMBL12180851
Linifanib
vincristine
Crizotinib
OPAHMANWVUMWAW-GHFZSMQJSA-N
AZD1152-HQPA
Selumetinib + Vorinostat
UNC0638
AZD8055
FH535
Gefitinib
Isoevodiamine
EHT-1864
Erismodegib
Erlotinib
SCHEMBL2066172
LRLWXBHFPGSUOX-HHKXYDNMSA-N
camptothecin
LRRK2-IN-1
CI-1040
NVP-231
Dacinostat
HG6-64-1
VX-702
BRD7880
Quizartinib
AGWAUACRBAQPJJ-UHFFFAOYSA-N
Selumetinib + Decitabine
MLS000571394
Bortezomib
DQNFQTHSDKXSEE-QFZQXZRASA-N
Lapatinib
Aacocf3
doxorubicin
Vismodegib
DBeQ
Sitagliptin
PX-12
CICLOPIROX
BRD-K58306044-001-01-3
CHEBI:94975
BAI1
N-methyl-N-[4-[[6-[[1-(1-oxoprop-2-enyl)-3-piperidinyl]amino]-7H-purin-2-yl]amino]phenyl]propanamide
Vorapaxar
WP-1130
alpelisib
CHEBI:93385
PF-543
NVP-BHG712
Zibotentan
QFJCIRLUMZQUOT-LAOSHSCVSA-N
Indisulam
Flavopiridol
Elesclomol
LE 135
Tivantinib
BRD-A34462049-001-01-0
Decitabine + SCHEMBL2671349
CHEMBL568305
WFYGWJXIPUGUJF-UHFFFAOYSA-N
IC-87114
procarbazine
JQ1
R406 (free base)
Tacedinaline
Vandetanib
Blebbistatin
JNK Inhibitor VIII
Enzastaurin
NVP-BSK805
OSI-027;COC1=CC=CC2=C/C(=C/3\C4=C(N=CNN4C(=N3)C5CCC(CC5)C(=O)O)N)/N=C21
SB-590885
SCHEMBL15422095
PD173074
methotrexate
5-Fluorouracil
ML031
Semagacestat
RITA
CDK9 inhibitor
Dasatinib
BMS-536924;CC1=CC(=CC2=C1NC(=C3C(=CC=NC3=O)NC[C@H](C4=CC(=CC=C4)Cl)O)N2)N5CCOCC5
SCHEMBL13741284
Daporinad
STF-31
Narciclasine
Code
attribution_data_all = attribution_data_all.set_index("GO_term")
Code
attribution_data_all.head()
BRD-K02251932-001-01-3 BRD-K25737009-001-01-2 Nintedanib bicalutamide N-[(2R,3S)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide PHA-665752 N-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1H-indazol-6-yl]benzamide Ki8751 IPA-3 FAWUGYGEBHAQBU-PPEXNQRJSA-N ... ML031 Semagacestat RITA CDK9 inhibitor Dasatinib BMS-536924;CC1=CC(=CC2=C1NC(=C3C(=CC=NC3=O)NC[C@H](C4=CC(=CC=C4)Cl)O)N2)N5CCOCC5 SCHEMBL13741284 Daporinad STF-31 Narciclasine
GO_term
GO:0000012_1 -0.006564 -0.005680 0.003188 -0.005863 -0.003410 -0.002951 0.001118 0.002033 0.000799 -0.007842 ... -0.007256 -0.004271 -0.013783 -0.006253 0.002864 0.009604 -0.008099 -0.001475 -0.003698 -0.009866
GO:0000012_2 0.010029 0.011514 0.009892 0.012072 0.005788 0.012909 0.002316 0.009362 -0.011816 0.000166 ... 0.008918 -0.002449 0.017704 0.006732 0.002447 0.006485 0.003888 -0.000569 0.001628 0.017132
GO:0000012_3 0.008466 0.006840 -0.000027 0.006379 0.003082 -0.006110 -0.008877 -0.000347 -0.013084 0.000150 ... -0.006096 0.011308 0.012216 0.000997 0.011521 0.013800 0.002843 0.016328 0.021640 0.003536
GO:0000012_4 0.013018 0.007276 0.010128 0.008622 0.004795 0.006706 0.000874 0.005514 -0.003347 -0.000010 ... -0.003682 0.006544 0.010806 0.003346 0.017556 0.023130 0.001105 0.009710 0.016940 0.014787
GO:0000012_5 -0.007076 -0.006129 -0.007634 -0.003785 -0.004151 -0.007947 -0.008430 -0.006039 -0.002722 0.002163 ... 0.001821 -0.002346 -0.007831 -0.009368 -0.011118 -0.003408 -0.001760 0.003593 -0.000109 -0.020831

5 rows × 684 columns

ChEMBL Drug Target Slim

Code
from chembl_webresource_client.new_client import new_client

Import SparseGO drugs

Code
# Get names 
def get_compound_names(file_name):
    compounds = []

    with open(file_name, 'r') as fi:
        for line in fi:
            tokens = line.strip().split('\t')
            compounds.append(tokens[2].lower())

    return compounds
names = get_compound_names(computer+"SparseGO_code/data/compound_names.txt")
names.pop(0)
'name'

chEML IDs

Get chembl IDs of drugs if available (there are always 684 drugs, the compounds2ids object can be reused)

Code
# Get all chembl IDs -- tarda
molecule = new_client.molecule

compounds2ids = {}
for i,drug in enumerate(names):
    
    if " + " in drug:
        drug_split = drug.split(" + ", 1)
        ID1 = list(molecule.filter(pref_name__iexact=drug_split[0]).only('molecule_chembl_id'))
        ID2 = list(molecule.filter(pref_name__iexact=drug_split[1]).only('molecule_chembl_id'))
        if len(ID1)>0 and len(ID2)>0:
            compounds2ids[drug]=[ID1[0]['molecule_chembl_id'],ID2[0]['molecule_chembl_id']]
        elif len(ID1)>0:
            compounds2ids[drug]=ID1[0]['molecule_chembl_id'] 
        elif len(ID2)>0:
            compounds2ids[drug]=ID2[0]['molecule_chembl_id'] 
        else:
            print(drug,i)

    else:
        ID = list(molecule.filter(pref_name__iexact=drug).only('molecule_chembl_id'))
        if len(ID)>0:
            ID = ID[0]['molecule_chembl_id']
            compounds2ids[drug]=ID
        else:
            # for drugs that have the chembl ID as the name!!
            ID = list(molecule.filter(chembl_id=drug).only('molecule_chembl_id')) 
            if len(ID)>0:
                ID = ID[0]['molecule_chembl_id']
                compounds2ids[drug]=ID
            else:
                # in case it is not found by pref_name
                ID = list(molecule.filter(molecule_synonyms__molecule_synonym__iexact=drug).only('molecule_chembl_id'))
                if len(ID)>0:
                    ID = ID[0]['molecule_chembl_id']
                    compounds2ids[drug]=ID
                else:
                    print(drug,i)
    # 341 chembl IDs where found (october 31 2022)
brd-k02251932-001-01-3 0
brd-k25737009-001-01-2 1
n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide 4
n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide 6
ki8751 7
ipa-3 8
fawugygebhaqbu-ppexnqrjsa-n 9
brd-k05870596-001-01-4 11
zinc113660258 15
nsc60043 16
wpttvjltnawyao-cdypjpissa-n 17
schembl2139153 20
hms1361j12 22
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-3-(4-morpholinyl)propanamide 23
fgin-1-27 24
brd-k30019337-001-01-1 25
mkwlqyduwjbeku-lwsjdiafsa-n 32
16beta-bromoandrosterone 33
brd-k16147474-001-01-1 34
schembl13833463 37
brd-k53792571-003-01-6 42
jq1 + schembl2671349 43
mnulegdcpyonbu-pamdcedjsa-n 44
ly2183240 46
unc-0638 + schembl2671349 49
brd-k52037352-001-01-6 55
rsk inhibitor fmk 63
n-[(2s,3s)-2-[[[(cyclohexylamino)-oxomethyl]-methylamino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-2-(1-methyl-3-indolyl)acetamide 64
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn.cl 66
hhdwuyjenprcsp-uttpphfysa-n 67
n9-isopropyl-olomoucine 68
cil56 74
n-[2-methyl-5-[2-oxo-9-(1h-pyrazol-4-yl)-1-benzo[h][1,6]naphthyridinyl]phenyl]-2-propenamide 75
bx-912 76
schembl11942935 81
brd-k09587429-001-01-3 83
ml311 84
nsc87877 85
cid5951923 86
smr000198998 87
nsc 23766 89
sr1001 91
n-[[(4r,5r)-2-[(2r)-1-hydroxypropan-2-yl]-4-methyl-8-(4-methylpent-1-ynyl)-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyrazinecarboxamide 92
importazole 93
ml-210 94
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn 96
erk5-in-1 98
cdk4/6 inhibitor iv 101
pluripotin 103
wh-4-023 106
dmog 107
pdk1 inhibitor 110
teniposide [usan] 112
wee1 inhibitor 113
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=c/c(=c/nnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)/c1=o 116
cot inhibitor-2 117
brd-a28105619-001-01-3 118
mira-1 120
pifithrin 123
docetaxel (taxotere) 124
jq1 + unc0638 126
schembl916391 127
schembl15422028 130
schembl16479156 133
schembl6874948 135
ci 976 137
brd-a15100685-001-01-8 138
brd-k19103580-001-01-2 141
z-llnle-cho 144
chm-1 150
nan + navitoclax(1) 151
telomerase inhibitor ix 152
wz4002 154
isx-9 155
aica ribonucleotide 160
rsl3 161
ikk-3 inhibitor 163
schembl12474870 164
otkwubxkthwzke-fuopvmcbsa-n 165
mls001198989 168
n-[2-methyl-5-[oxo-[3-(1-oxoprop-2-enylamino)-5-(trifluoromethyl)anilino]methyl]phenyl]-5-isoxazolecarboxamide 169
brd-k17060750-001-01-0 170
brd4770 173
selisistat 177
osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21 178
schembl18426910 179
lsm-36779 181
schembl18216694 182
n-[(2s,3s)-2-[(dimethylamino)methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-2,5-dimethyl-3-pyrazolecarboxamide 184
brd-k62801835-001-01-0 187
vx-11e 191
i-bet-762 195
dfsdbfjuwanyes-ubwkhrtasa-n 197
schembl2671349 198
gsk1904529a 199
ng25 200
schembl12041987 201
ahpn 206
lsm-13729 207
n-[[(4s,5r)-8-[2-(2-fluorophenyl)ethynyl]-2-[(2s)-1-hydroxypropan-2-yl]-4-methyl-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyridin-4-ylacetamide 214
unc0321 215
ch 55 217
mi-2 219
piperlongumine + telomerase inhibitor ix 221
gnf-2 224
rad51 inhibitor b02 225
schembl12469828 226
n-[6-(2-amino-4-fluoroanilino)-6-oxohexyl]-4-methylbenzamide 229
az3146 230
brd-1240 233
bms-536924;cc1=cc(=cc2=c1n/c(=c\3/c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)/n2)n5ccocc5 235
ak174031 + mk-1775 236
schembl10183194 241
azd7545 244
schembl10436373 245
parthenolide;c/c/1=c\cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c 248
gw 441756 249
brd-k33514849-001-01-9 252
n-[(2r,3s)-2-[[(4-chlorophenyl)sulfonyl-methylamino]methyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4,4,4-trifluorobutanamide 254
n-[3-[[2-[[4-(dimethylamino)cyclohexyl]amino]-9-propan-2-yl-6-purinyl]amino]phenyl]-2-propenamide 257
smer-3 259
n-[(2s,3s,6r)-2-(hydroxymethyl)-6-[2-(4-methyl-1-piperazinyl)-2-oxoethyl]-3-oxanyl]-1,3-benzodioxole-5-carboxamide 263
paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c 264
dichloroplatinum diammoniate 265
cct036477 266
nutlin-3a 268
schembl4463213 269
n-[3-(1h-benzimidazol-2-yl)-5-(1-piperazinylmethyl)phenyl]-2-quinoxalinecarboxamide 271
rg108 274
nsc207895 276
piperlongumine 280
pd153035 282
nsc373989 283
cbb1007 284
tipifarnib (s enantiomer) 285
tpca-1 286
tgx-221 288
schembl12182311 290
5-azacytidine 291
iu-1 292
pyrazolanthrone 293
gsk-j4 294
jw74 297
dnmdp-2 300
brd1172 304
brd-k35716340-001-01-2 307
1009820-21-6 309
ml-030 310
cay10594 311
khs101 313
brd-k02492147-001-01-4 316
chebi:94110 318
brd-6929 320
bam7 322
sch-79797 323
cbpnzqvsjqdfbe-rerlvdevsa-n 324
schembl15428380 326
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=cc(=cnnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)c1=o 327
spautin-1 329
ubrvgbldxdoetm-uhfffaoysa-n 330
brd-k20514654-001-01-8 332
gsk461364 337
schembl2586580 338
mi-1 341
sb 225002 342
ethyl 5,5,7,7-tetramethyl-2-(5-nitrothiophene-2-carboxamido)-4,5,6,7-tetrahydrothieno[2,3-c]pyridine-3-carboxylate 344
glutaminase c-in-1 345
gsk429286a 347
jw 480 349
11-cis retinoic acid 350
gqrreykspjmlaw-ygnumjmvsa-n 351
brd3308 352
cid-2858522 356
schembl16046542 359
mtlmdzjugdutcp-ywefrbeisa-n 360
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]methanesulfonamide 361
curcumin, curcuma longa l. 362
az-628 363
xhqlywyicdktpj-uhfffaoysa-n 366
agk2 369
stf-62247 370
schembl15444220 371
brd-k04800985-001-01-1 372
schembl4320913 374
brd6708 375
lsm-6185 377
ski ii 379
spox1_002925 380
pdipalloxofubu-uhfffaoysa-n 385
parthenolide;c/c/1=c/cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c 386
ko 143 389
n-[(2s,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4-pyridinecarboxamide 391
yk-4-279 392
vaf347 393
epz004777 + schembl2671349 394
6-bio 395
schembl1710881 396
pci-34051 398
brd-k33199242-001-01-2 399
nsc 95397 400
necrosulfonamide 401
chebi:119735 402
nsc74859 403
schembl14934014 406
pf 750 407
brd-a59431241-001-01-1 408
ym-201636 411
brd-k29086754-001-01-7 414
fqi1 415
brd-k49456190-001-01-0 416
bibr 1532 425
gsk269962a 427
schembl16273428 428
methylstat 429
bms270394 431
nsc136476 432
i-bet151 435
schembl16296919 439
n-(2,5-dimethoxyphenyl)sulfonyl-n-(4-methoxyphenyl)benzamide 440
marinopyrrole a 442
brd-a63646118-001-02-6 444
schembl1914213 447
sz4ta2 448
brd-k27986637-001-01-3 450
n-[[(2s,3s)-8-[2-(1-hydroxycyclopentyl)ethynyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-pyrido[2,3-b][1,5]oxazocin-2-yl]methyl]-n-methyl-4-oxanecarboxamide 452
etp-46464 453
jw-55 454
cct007093 458
brd-k49290616-001-01-9 462
kcbbhekxehmwfw-yqzfvpmhsa-n 464
brd-k50799972-001-01-3 465
schembl17821363 466
icg-001 467
srlvtmsbrcmody-qxpfvdmisa-n 468
n-(4-methoxyphenyl)sulfonyl-n-[2-[2-(1-oxido-4-pyridin-1-iumyl)ethenyl]phenyl]acetamide 470
sch-529074 + jnj-26854165 471
at7867 473
stemregenin 1 478
naphtho(2,1-b)furan, 1-methyl-2-nitro- 479
fttyfnwrwdlflp-uhfffaoysa-n 480
n-(2-(4-(2-oxo-2,3-dihydro-1h-benzo[d]imidazol-1-yl)piperidin-1-yl)ethyl)-2-naphthamide 484
prima-1 488
pf-4708671 489
dasa-58 491
ethyl 4-[4-[(5-nitrofuran-2-yl)methylidene]-3,5-dioxopyrazolidin-1-yl]benzoate 494
c646 495
cay10603 497
su11274 498
lsm-6189 500
kpt-185 502
mls000106215 504
achp 512
n-[(2r,3r)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-4-pyridinecarboxamide 515
bix 02189 518
fti-277 520
as601245 521
mln2480 523
oprea1_718426 524
nan + nan 527
cct 018159 528
pd318088 529
mdivi-1 531
schembl618594 533
schembl18188080 539
gw-405833 541
nvp-adw742 545
schembl6465274 548
oqhlpaawwgdxaw-uhfffaoysa-n 549
ku-60019 550
ethyl 2-cyano-3-(3,4-dichlorophenyl)acryloylcarbamate 552
j3.559.058g 553
schembl2085358 556
azanide; dichloroplatinum(2+) 557
nan + navitoclax(2) 558
nsc48300 560
pf 184 561
qs11 563
brd-k41597374-001-01-7 564
n'-(2-pyrrolylidenemethyl)-2-(2,4,6-trichlorophenoxy)acetohydrazide 566
ak174031 567
ql47 568
bms614 569
(s)-selisistat 571
n-[(2r,3s)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]cyclohexanecarboxamide 572
cp-466722 573
isonicotinohydroxamic acid 574
akt inhibitor viii 576
wz8040 579
fhtvasvneuemiv-lwsjdiafsa-n 581
pf-573228 582
hc-067047 583
ku-0063794 585
pik-93 586
schembl13833318 587
schembl13737661 591
n1-[2-(1h-indol-3-yl)ethyl]-n3-pyridin-4-ylbenzene-1,3-diamine 592
sr8278 593
(-)-rapamycin 596
schembl12180851 598
opahmanwvumwaw-ghfzsmqjsa-n 602
azd1152-hqpa 603
unc0638 605
fh535 607
isoevodiamine 609
eht-1864 610
schembl2066172 613
lrlwxbhfpgsuox-hhkxydnmsa-n 614
lrrk2-in-1 616
nvp-231 618
hg6-64-1 620
brd7880 622
agwauacrbaqpjj-uhfffaoysa-n 624
mls000571394 626
dqnfqthsdkxsee-qfzqxzrasa-n 628
aacocf3 630
dbeq 633
brd-k58306044-001-01-3 637
chebi:94975 638
bai1 639
n-methyl-n-[4-[[6-[[1-(1-oxoprop-2-enyl)-3-piperidinyl]amino]-7h-purin-2-yl]amino]phenyl]propanamide 640
wp-1130 642
chebi:93385 644
pf-543 645
nvp-bhg712 646
qfjcirlumzquot-laoshscvsa-n 648
le 135 652
brd-a34462049-001-01-0 654
wfygwjxipugujf-uhfffaoysa-n 657
ic-87114 658
jq1 660
r406 (free base) 661
blebbistatin 664
jnk inhibitor viii 665
nvp-bsk805 667
osi-027;coc1=cc=cc2=c/c(=c/3\c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)/n=c21 668
schembl15422095 670
pd173074 671
ml031 674
rita 676
cdk9 inhibitor 677
bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5 679
schembl13741284 680
stf-31 682
Code
#manually add 6 more
compounds2ids["teniposide [usan]"]="CHEMBL452231"
compounds2ids["docetaxel (taxotere)"]="CHEMBL92"
compounds2ids["nan + navitoclax(1)"]="CHEMBL443684"
compounds2ids["nan + navitoclax(2)"]="CHEMBL443684"
compounds2ids["osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21"]="CHEMBL3120215"
compounds2ids["paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c"]="CHEMBL428647"
Code
len(compounds2ids)
347

chEMBL MoA (targets)

Get the molecule targets of each drug (if available)

Code
compounds2targets = dict() # required to store the drug targets 
for drug in compounds2ids.keys():
    compounds2targets[drug] = set()

chembl_ids = list(compounds2ids.values()) # Chembl IDs of drugs

for drug in compounds2ids:
    # we jump from compounds to targets through activities:
    activities = new_client.mechanism.filter(parent_molecule_chembl_id__in=compounds2ids[drug]).only(
        ['parent_molecule_chembl_id', 'target_chembl_id'])
    # extracting target ChEMBL IDs from activities:
    for act in activities:
        compounds2targets[drug].add(act['target_chembl_id'])
    print(drug)
# We now know all targets for some drug
nintedanib
bicalutamide
pha-665752
retinol + schembl2671349
onalespib
cediranib
vemurafenib + erlotinib
brefeldin a
azd7762
staurosporine
nvp-tae684
apicidin
purmorphamine
decitabine
tivozanib
mk-2206
palbociclib
thapsigargin
sns-032
osu-03012
necrostatin-1
bi 2536
gemcitabine
chembl436817
serdemetan
ixazomib
panobinostat
cimetidine
mirdametinib
tacrolimus
cp-724714
dinaciclib
gdc-0879
tw-37
fedratinib
retinol + navitoclax
palmostatin b
lenalidomide
brivanib
chembl2058177
bendamustine
sb-216763
tanespimycin + docetaxel (taxotere)
rigosertib
tanespimycin + gemcitabine
neratinib
parbendazole
chembl3183639
selumetinib + gdc-0941
chembl2203525
lomeguatrib
vinblastine
temozolomide
am580
tosedostat
chembl3185999
chlorambucil
snx-2112
zm-447439
idelalisib
chembl2356172
chembl24850
olaparib
tcmdc-123515
venetoclax
cerulenin
leptomycin b
pf-4800567
pazopanib
chembl585951
amuvatinib
gsk-650394
thalidomide
neopeltolide
ifosfamide
gsk1070916
navitoclax + alisertib
gdc-0941
unc0638 + selumetinib
vorinostat + schembl2671349
niclosamide
embelin
chembl3182697
ac-55649
pi-103
cabozantinib
tak-715
pevonedistat
cyclosporin a
zstk474
gw2580
sb-525334
vemurafenib + crizotinib
prochlorperazine
bosutinib
tipifarnib
chembl520231
vinorelbine
gsk-690693
mgcd-265
ar-42
lfm-a13
nelarabine
kw-2449
tg100-115
epothilone b
trametinib
kx2-391
ro-3306
pelitinib
gossypol
xl765
bortezomib + cay10603
abt-737
belinostat
pluripotin + navitoclax
motesanib
gsk4112
sepantronium + bromide
valdecoxib
veliparib
bryostatin 1
obatoclax + methanesulfonic acid
docetaxel
bms-509744
clofarabine
sunitinib
mk-0752
vemurafenib
pifithrin-mu
jq1 + mk-0752
c6 ceramide
nutlin-3
vemurafenib + navitoclax
austocystin d
as-605240
dabrafenib
pha-793887
chembl2132053
bms-754807
ku-55933
a-443654
sotrastaurin
gsk1059615
at7519
cudc-101
simvastatin
tandutinib
l-685458
navitoclax + decitabine
isoliquiritigenin
etoposide
sgx-523
canertinib
itraconazole
alisertib
imatinib
raf265
mps1-in-1 + hydrochloric acid
azd4547
nvp-bez235
dacarbazine
t0901317
chs-828
sl-0101
chembl515416
ibrutinib
tanespimycin
navitoclax
temsirolimus
phloretin
osi-930
bay 61-3606 + hydrochloric acid
fingolimod
s-trityl-l-cysteine
a-770041
abiraterone
jq1 + navitoclax
chembl2143553
valisone
bortezomib + snx-2112
selumetinib + mk-2206
selumetinib
ouabain
a-804598
paclitaxel
rucaparib
bix-01294
luminespib
chembl258148
cyclopamine
midostaurin
refametinib
vemurafenib + selumetinib
navitoclax + gemcitabine
mitomycin c
axitinib
tozasertib
tcmdc-125552
navitoclax + selumetinib
unc0638 + navitoclax
cgp-60474
regorafenib
pf-562271
bortezomib + nan
avrainvillamide
chembl2062550
bx-795
chembl416418
myricetin
entinostat
roscovitine
galunisertib
skepinone-l
ku-0060648
trifluoperazine
lenvatinib
erastin
fulvestrant
cytarabine
ponatinib
bleomycin sulfate
obatoclax
homoharringtonine
tamoxifen
alectinib
etoposide + schembl2671349
cx-5461
chembl2152368
doramapimod
chembl2398212
chembl2206358
y-39983
avagacestat
gsk2636771
jq1 + vorinostat
doxorubicin + navitoclax
turofexorate isopropyl
ruxolitinib
navitoclax + vorinostat
shikonin
sorafenib
betulinic acid
tubastatin a
azd1480
ro4929097
gsk2126458
cytochalasin b
phenformin
bemcentinib
mg-132
chembl2180739
sildenafil
vorinostat
gw843682x
afatinib
navitoclax + sorafenib
navitoclax + piperlongumine
pyrimethamine
mk-1775
bortezomib + tanespimycin
retinoic acid
plx-4720
linsitinib
masitinib
salermide
whi-p97
pf-3758309
retinol + selumetinib
nilotinib
chir-99021
1,2-cyclohexanediamine anion + oxalic acid
vemurafenib + salermide
salubrinal
sch-529074
topotecan
azd6482
istradefylline
bleomycin
sb-431542
navitoclax + telomerase inhibitor ix
lestaurtinib
chembl3188232
foretinib
fr-180204
cyt387
cyclophosphamide
jq1 + selumetinib
ispinesib + methanesulfonic acid
darinaparsin
selumetinib + piperlongumine
chembl2381520
saracatinib
nu-7441
(-)-epigallocatechin gallate
epz004777 + selumetinib
epz004777 + navitoclax
bexarotene
o6-benzylguanine
sepantronium
linifanib
vincristine
crizotinib
selumetinib + vorinostat
azd8055
gefitinib
erismodegib
erlotinib
camptothecin
ci-1040
dacinostat
vx-702
quizartinib
selumetinib + decitabine
bortezomib
lapatinib
doxorubicin
vismodegib
sitagliptin
px-12
ciclopirox
vorapaxar
alpelisib
zibotentan
indisulam
flavopiridol
elesclomol
tivantinib
decitabine + schembl2671349
chembl568305
procarbazine
tacedinaline
vandetanib
enzastaurin
sb-590885
methotrexate
5-fluorouracil
semagacestat
dasatinib
daporinad
narciclasine
teniposide [usan]
docetaxel (taxotere)
nan + navitoclax(1)
nan + navitoclax(2)
osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21
paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c
Code
compounds2targets = {k: v for k, v in compounds2targets.items() if len(v) != 0 and len([x for x in list(v) if x is not None]) != 0 }
# 218 DRUGS HAVE ANNOTATED DRUG TARGETS
Code
len(compounds2targets)
220

Drug slim GO terms

Get the GO terms of each target

Code
# Get the GO terms of each target
compounds_GOterms = {}
for i in range(0, len(compounds2targets.keys())):
    compound = list(compounds2targets.keys())[i]
    GOterms_list = []
    
    for j in range(0, len(list(compounds2targets[compound]))):   
        target = list(compounds2targets[compound])[j]
        all_cross_references = list(new_client.target.filter(target_chembl_id=target).only(['target_components']).only(['target_components_xrefs']))[0]['target_components']
        if len(all_cross_references)>0: # not all targets have annotated go_terms
            for i in range(0, len(all_cross_references)):
                GOterms = all_cross_references[i]
                GOterms = pd.DataFrame(GOterms['target_component_xrefs'])
                GOterms = pd.concat([GOterms,pd.Series([target]).repeat(len(GOterms)).reset_index().pop(0)],axis=1) # add target ID to dataframe 
                GOterms_list= GOterms_list + GOterms.values.tolist()
    
    compounds_GOterms[compound] =  pd.DataFrame(GOterms_list).drop_duplicates()
    print(compound)
nintedanib
bicalutamide
onalespib
cediranib
vemurafenib + erlotinib
azd7762
decitabine
tivozanib
mk-2206
palbociclib
sns-032
osu-03012
bi 2536
gemcitabine
ixazomib
panobinostat
cimetidine
mirdametinib
tacrolimus
cp-724714
dinaciclib
fedratinib
retinol + navitoclax
lenalidomide
brivanib
bendamustine
tanespimycin + docetaxel (taxotere)
tanespimycin + gemcitabine
neratinib
selumetinib + gdc-0941
vinblastine
temozolomide
tosedostat
chlorambucil
idelalisib
olaparib
venetoclax
pazopanib
amuvatinib
thalidomide
ifosfamide
gsk1070916
navitoclax + alisertib
gdc-0941
unc0638 + selumetinib
vorinostat + schembl2671349
niclosamide
cabozantinib
tak-715
pevonedistat
cyclosporin a
zstk474
vemurafenib + crizotinib
prochlorperazine
bosutinib
tipifarnib
vinorelbine
gsk-690693
nelarabine
kw-2449
tg100-115
epothilone b
trametinib
kx2-391
pelitinib
xl765
bortezomib + cay10603
belinostat
pluripotin + navitoclax
motesanib
valdecoxib
veliparib
obatoclax + methanesulfonic acid
docetaxel
clofarabine
sunitinib
vemurafenib
vemurafenib + navitoclax
dabrafenib
pha-793887
bms-754807
sotrastaurin
gsk1059615
at7519
cudc-101
simvastatin
tandutinib
navitoclax + decitabine
etoposide
sgx-523
canertinib
alisertib
imatinib
raf265
azd4547
nvp-bez235
dacarbazine
ibrutinib
tanespimycin
navitoclax
temsirolimus
osi-930
fingolimod
abiraterone
jq1 + navitoclax
valisone
bortezomib + snx-2112
selumetinib + mk-2206
selumetinib
paclitaxel
rucaparib
luminespib
midostaurin
refametinib
vemurafenib + selumetinib
navitoclax + gemcitabine
mitomycin c
axitinib
tozasertib
navitoclax + selumetinib
unc0638 + navitoclax
regorafenib
pf-562271
bortezomib + nan
entinostat
roscovitine
galunisertib
trifluoperazine
lenvatinib
fulvestrant
cytarabine
ponatinib
obatoclax
tamoxifen
alectinib
etoposide + schembl2671349
doramapimod
y-39983
avagacestat
gsk2636771
jq1 + vorinostat
doxorubicin + navitoclax
turofexorate isopropyl
ruxolitinib
navitoclax + vorinostat
sorafenib
azd1480
ro4929097
gsk2126458
bemcentinib
sildenafil
vorinostat
afatinib
navitoclax + sorafenib
navitoclax + piperlongumine
pyrimethamine
mk-1775
bortezomib + tanespimycin
retinoic acid
linsitinib
masitinib
pf-3758309
retinol + selumetinib
nilotinib
vemurafenib + salermide
topotecan
azd6482
istradefylline
bleomycin
navitoclax + telomerase inhibitor ix
lestaurtinib
foretinib
cyt387
cyclophosphamide
jq1 + selumetinib
ispinesib + methanesulfonic acid
selumetinib + piperlongumine
saracatinib
epz004777 + selumetinib
epz004777 + navitoclax
bexarotene
linifanib
vincristine
crizotinib
selumetinib + vorinostat
azd8055
gefitinib
erismodegib
erlotinib
ci-1040
vx-702
quizartinib
selumetinib + decitabine
bortezomib
lapatinib
doxorubicin
vismodegib
sitagliptin
px-12
ciclopirox
vorapaxar
alpelisib
zibotentan
flavopiridol
tivantinib
decitabine + schembl2671349
procarbazine
tacedinaline
vandetanib
enzastaurin
methotrexate
5-fluorouracil
semagacestat
dasatinib
teniposide [usan]
docetaxel (taxotere)
nan + navitoclax(1)
nan + navitoclax(2)
osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21
paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c
Code
len(compounds_GOterms)
220
Code
# we have 206 annotated drugs on CHEMBL
Code
# add GO terms found in CTRPv2
Code
CTRPv2_terms = pd.read_excel('ctrp_goterms_drugs.xlsx')  
Code
# add GO terms of drugs with or without annotations
for drug in CTRPv2_terms["Drug"].unique():
    if drug not in list(compounds_GOterms.keys()): # some drugs had no previous data, no annotations from chembl
        compounds_GOterms[drug] = pd.DataFrame() # create empty dataframe

    for term in list(CTRPv2_terms.loc[CTRPv2_terms["Drug"]==drug]["Field"]):
        compounds_GOterms[drug] = pd.concat([compounds_GOterms[drug],pd.DataFrame([term,"","GoProcess",""]).transpose()])
    compounds_GOterms[drug] = compounds_GOterms[drug].drop_duplicates() 

# now we have 233 annotated drugs
Code
# Delete drugs with no GOterms (some targets have no annotated GO terms)
compounds_GOterms = {k: v for k, v in compounds_GOterms.items() if len(v) != 0 } 
Code
len(compounds_GOterms)
236

Match GO terms

Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results…

Code
def load_ontology_extra_output(ontology_file, gene2id_mapping):
    """
    Creates the directed graph of the GO terms and stores the connected elements in arrays.

        Output
        ------
        dG: networkx.classes.digraph.DiGraph
            Directed graph of all terms

        terms_pairs: numpy.ndarray
            Store the connection between a term and a term

        genes_terms_pairs: numpy.ndarray
            Store the connection between a gene and a term
    """

    dG = nx.DiGraph() # Directed graph class

    file_handle = open(ontology_file) #  Open the file that has genes and go terms

    terms_pairs = [] # store the pairs between a term and a term
    genes_terms_pairs = [] # store the pairs between a gene and a term

    gene_set = set() # create a set (elements can't repeat)
    term_direct_gene_map = {}
    term_size_map = {}


    for line in file_handle:

        line = line.rstrip().split() # delete spaces and transform to list, line has 3 elements

        # No me hace falta el if, no tengo que separar las parejas
        if line[2] == 'default': # si el tercer elemento es default entonces se conectan los terms en el grafo
            dG.add_edge(line[0], line[1]) # Add an edge between line[0] and line[1]
            terms_pairs.append([line[0], line[1]]) # Add the pair to the list
        else:
            if line[1] not in gene2id_mapping: # se salta el gen si no es parte de los que estan en gene2id_mapping
                print(line[1])
                continue

            genes_terms_pairs.append([line[0], line[1]]) # add the pair

            if line[0] not in term_direct_gene_map: # si el termino todavia no esta en el diccionario lo agrega
                term_direct_gene_map[ line[0] ] = set() # crea un set

            term_direct_gene_map[line[0]].add(gene2id_mapping[line[1]]) # añadimos el gen al set de ese term

            gene_set.add(line[1]) # añadimos el gen al set total de genes

    terms_pairs = np.array(terms_pairs) # convert to 2d array
    genes_terms_pairs = np.array(genes_terms_pairs) # convert to 2d array

    file_handle.close()

    print('There are', len(gene_set), 'genes')

    for term in dG.nodes(): # hacemos esto para cada uno de los GO terms

        term_gene_set = set() # se crea un set

        if term in term_direct_gene_map:
            term_gene_set = term_direct_gene_map[term] # genes conectados al term

        deslist = nxadag.descendants(dG, term) #regresa todos sus GO terms descendientes (biological processes tiene 2085 descendientes, todos menos el mismo)

        for child in deslist:
            if child in term_direct_gene_map: # añadir los genes de sus descendientes
                term_gene_set = term_gene_set | term_direct_gene_map[child] # union of both sets, ahora tiene todos los genes los suyos y los de sus descendientes

        if len(term_gene_set) == 0:
            print('There is empty terms, please delete term:', term)
            sys.exit(1)
        else:
            # por ahora esta variable no me hace falta
            term_size_map[term] = len(term_gene_set) # cantidad de genes en ese term  (tomando en cuenta sus descendientes)

    leaves = [n for n in dG.nodes if dG.in_degree(n) == 0] # buscar la raiz
    #leaves = [n for n,d in dG.in_degree() if d==0]

    uG = dG.to_undirected() # Returns an undirected representation of the digraph
    connected_subG_list = list(nxacc.connected_components(uG)) #list of all GO terms

    # Verify my graph makes sense...
    print('There are', len(leaves), 'roots:', leaves[0])
    print('There are', len(dG.nodes()), 'terms')
    print('There are', len(connected_subG_list), 'connected components')
    if len(leaves) > 1:
        print('There are more than 1 root of ontology. Please use only one root.')
        sys.exit(1)
    if len(connected_subG_list) > 1:
        print( 'There are more than connected components. Please connect them.')
        sys.exit(1)

    return dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_map

SparseGO graph

Code
# Import SparseGO graph (to extract all nodes/terms)... 

# Load ontology: create the graph of connected GO terms
dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_map = load_ontology_extra_output(onto, gene2id_mapping)
####
sparseGO_terms = list(dG.nodes())
sparseGO_terms.remove("GO:0008150")
There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components

Full GO graph

Code
# Import full graph (to find parents)...
import obonet
#import networkx as nx
url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
full_graph = obonet.read_obo(url)
full_graph = full_graph.reverse() # change the direction of nodes
[n for n in full_graph.nodes if full_graph.in_degree(n) == 0] # graph contains the 3 roots (BP,MF,CC)
['GO:0003674', 'GO:0005575', 'GO:0008150']

Match terms!

Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results… if the slim terms’ ascendants are a match, they are also added

Code
# Each model has DIFFERENT matches (the graph is different)
compounds_GOterms_matches = {}
for drug in compounds_GOterms.keys():
    # choose drug
    drug_df = compounds_GOterms[drug]
    drug_slim_GOterms = set(drug_df.loc[drug_df[2] == "GoProcess"][0]) # only GO processes
    #set(sparseGO_terms) & set(drug_slim_GOterms)
    drug_matches = [] # store all directly matched terms and matches with all parents
    for term in drug_slim_GOterms: # term ='GO:1902669' # buen ejemplo 
        
        if term in sparseGO_terms: # is the term in the sparseGO terms? 
            drug_matches.append([1,term]) # add to list
            #1: same term, 2:not direct match  (esto igual despues...the number indicates how direct is the relationship 0:same term, 1: parent, 2: grandpa, 3:...)
        
        # are its ascendants in the sparseGO terms? 
        parents = [source for source, _ in  full_graph.in_edges(term)] # parents of term
        relationship = 2
        while(len(parents)>0): # check all ascendants 
            #relationship+=1
            parents = [source for source, _ in  full_graph.in_edges(parents)] # parents of parents
        
            for parent_term in parents: # add parents that match sparseGO terms 
                if parent_term in sparseGO_terms:
                    drug_matches.append([relationship, parent_term])
        
        drug_matches = (pd.DataFrame(drug_matches).drop_duplicates()).values.tolist() # remove duplicates
        compounds_GOterms_matches[drug] = drug_matches
    print(drug)
nintedanib
bicalutamide
onalespib
cediranib
vemurafenib + erlotinib
azd7762
decitabine
tivozanib
mk-2206
palbociclib
sns-032
osu-03012
bi 2536
gemcitabine
ixazomib
panobinostat
cimetidine
mirdametinib
tacrolimus
cp-724714
dinaciclib
fedratinib
retinol + navitoclax
lenalidomide
brivanib
tanespimycin + docetaxel (taxotere)
tanespimycin + gemcitabine
neratinib
selumetinib + gdc-0941
vinblastine
tosedostat
idelalisib
olaparib
venetoclax
pazopanib
amuvatinib
thalidomide
gsk1070916
navitoclax + alisertib
gdc-0941
unc0638 + selumetinib
vorinostat + schembl2671349
cabozantinib
tak-715
pevonedistat
cyclosporin a
zstk474
vemurafenib + crizotinib
prochlorperazine
bosutinib
tipifarnib
vinorelbine
gsk-690693
kw-2449
tg100-115
epothilone b
trametinib
kx2-391
pelitinib
xl765
bortezomib + cay10603
belinostat
pluripotin + navitoclax
motesanib
valdecoxib
veliparib
obatoclax + methanesulfonic acid
docetaxel
clofarabine
sunitinib
vemurafenib
vemurafenib + navitoclax
dabrafenib
pha-793887
bms-754807
sotrastaurin
gsk1059615
at7519
cudc-101
simvastatin
tandutinib
navitoclax + decitabine
etoposide
sgx-523
canertinib
alisertib
imatinib
raf265
azd4547
nvp-bez235
ibrutinib
tanespimycin
navitoclax
temsirolimus
osi-930
fingolimod
abiraterone
jq1 + navitoclax
valisone
bortezomib + snx-2112
selumetinib + mk-2206
selumetinib
paclitaxel
rucaparib
luminespib
midostaurin
refametinib
vemurafenib + selumetinib
navitoclax + gemcitabine
axitinib
tozasertib
navitoclax + selumetinib
unc0638 + navitoclax
regorafenib
pf-562271
bortezomib + nan
entinostat
roscovitine
galunisertib
trifluoperazine
lenvatinib
fulvestrant
cytarabine
ponatinib
obatoclax
tamoxifen
alectinib
etoposide + schembl2671349
doramapimod
y-39983
avagacestat
gsk2636771
jq1 + vorinostat
doxorubicin + navitoclax
turofexorate isopropyl
ruxolitinib
navitoclax + vorinostat
sorafenib
azd1480
ro4929097
gsk2126458
bemcentinib
sildenafil
vorinostat
afatinib
navitoclax + sorafenib
navitoclax + piperlongumine
pyrimethamine
mk-1775
bortezomib + tanespimycin
retinoic acid
linsitinib
masitinib
pf-3758309
retinol + selumetinib
nilotinib
vemurafenib + salermide
topotecan
azd6482
istradefylline
navitoclax + telomerase inhibitor ix
lestaurtinib
foretinib
cyt387
jq1 + selumetinib
ispinesib + methanesulfonic acid
selumetinib + piperlongumine
saracatinib
epz004777 + selumetinib
epz004777 + navitoclax
bexarotene
linifanib
vincristine
crizotinib
selumetinib + vorinostat
azd8055
gefitinib
erismodegib
erlotinib
ci-1040
vx-702
quizartinib
selumetinib + decitabine
bortezomib
lapatinib
doxorubicin
vismodegib
sitagliptin
px-12
vorapaxar
alpelisib
zibotentan
flavopiridol
tivantinib
decitabine + schembl2671349
tacedinaline
vandetanib
enzastaurin
methotrexate
5-fluorouracil
semagacestat
dasatinib
teniposide [usan]
docetaxel (taxotere)
nan + navitoclax(1)
nan + navitoclax(2)
osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21
paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c
phloretin
gossypol
myricetin
piperlongumine
pyrazolanthrone
prima-1
blebbistatin
staurosporine
cerulenin
purmorphamine
bix-01294
mg-132
fgin-1-27
itraconazole
sch-79797
necrostatin-1
rita
pifithrin-mu
brefeldin a
l-685458
nutlin-3
pi-103
neopeltolide
azd7545
pluripotin
avrainvillamide
indisulam
piperlongumine + telomerase inhibitor ix
Code
# delete drugs that have no matches
compounds_GOterms_matches = {i:j for i,j in compounds_GOterms_matches.items() if j != []}
Code
len(compounds_GOterms_matches)
230

SparseGO terms x drugSlim terms matrix

Code
attribution_data_all.columns = attribution_data_all.columns.str.lower() # in order to match the term
Code
attribution_data_all.head()
brd-k02251932-001-01-3 brd-k25737009-001-01-2 nintedanib bicalutamide n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide pha-665752 n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide ki8751 ipa-3 fawugygebhaqbu-ppexnqrjsa-n ... ml031 semagacestat rita cdk9 inhibitor dasatinib bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5 schembl13741284 daporinad stf-31 narciclasine
GO_term
GO:0000012_1 -0.006564 -0.005680 0.003188 -0.005863 -0.003410 -0.002951 0.001118 0.002033 0.000799 -0.007842 ... -0.007256 -0.004271 -0.013783 -0.006253 0.002864 0.009604 -0.008099 -0.001475 -0.003698 -0.009866
GO:0000012_2 0.010029 0.011514 0.009892 0.012072 0.005788 0.012909 0.002316 0.009362 -0.011816 0.000166 ... 0.008918 -0.002449 0.017704 0.006732 0.002447 0.006485 0.003888 -0.000569 0.001628 0.017132
GO:0000012_3 0.008466 0.006840 -0.000027 0.006379 0.003082 -0.006110 -0.008877 -0.000347 -0.013084 0.000150 ... -0.006096 0.011308 0.012216 0.000997 0.011521 0.013800 0.002843 0.016328 0.021640 0.003536
GO:0000012_4 0.013018 0.007276 0.010128 0.008622 0.004795 0.006706 0.000874 0.005514 -0.003347 -0.000010 ... -0.003682 0.006544 0.010806 0.003346 0.017556 0.023130 0.001105 0.009710 0.016940 0.014787
GO:0000012_5 -0.007076 -0.006129 -0.007634 -0.003785 -0.004151 -0.007947 -0.008430 -0.006039 -0.002722 0.002163 ... 0.001821 -0.002346 -0.007831 -0.009368 -0.011118 -0.003408 -0.001760 0.003593 -0.000109 -0.020831

5 rows × 684 columns

Code
attribution_data_all.shape
(25098, 684)

Only keep drugs that have annotated GO terms

Code
attribution_data_annotated = attribution_data_all[list(compounds_GOterms_matches.keys())]
Code
attribution_data_annotated.shape # 230 DRUGS
(25098, 230)

Build drugSlim (MoA) matrix

Code
slim_matrix = attribution_data_annotated.copy() # copy dataframe in order to build a similar matrix 
for col in slim_matrix.columns:
    slim_matrix[col].values[:] = 0 # empty matrix 
Code
for drug in compounds_GOterms_matches.keys():
    drug_matches = compounds_GOterms_matches[drug]
    drug_matches_names = list(pd.DataFrame(drug_matches)[1])
    
    drug_matches_names_duplicated = []
    for term in set(drug_matches_names):
        for i in range(1,7):
            drug_matches_names_duplicated.append(term+"_"+str(i))
    
    slim_matrix[drug][drug_matches_names_duplicated] = 1 # add a 1 if term is annotated to drug

Build matrices to store logits, predictions and real values

Code
logits_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
preds_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
slim_matrix_single_neuron = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)

Create models

Regression models…

Code
# Dictionaries to store results
GO_terms_auc_log = {}
GO_terms_aupr_log = {}
GO_terms_precision_log = {}

# Perform logistic
for goterm in sparseGO_terms:
    #store results of each cross validation
    # if (real_go_info[real_go_info["GO_term"]==goterm+"_1"]["layer_number"]).values >3:
    #     continue
    
    all_y_test = []
    all_y_pred_proba = []
    all_y_pred = []
    all_y_names = []
    X = []

    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()

    if sum(goterm_drugs) <= 8:
            continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    #score_mod = score
    score_mod = score.divide(score.std()).fillna(0) # AFECTA MUCHO

    # Separate drugs in 4 groups for cross-validation -----

    # Split data in 2 groups (with train_test_split in order to have 0s in both groups)
    X_part1,X_part2,y_part1,y_part2=train_test_split(score_mod,goterm_drugs,test_size=0.50,random_state=0,stratify=goterm_drugs)
    # Split data again in 4 groups (split data previously split)
    X_group1,X_group2,y_group1,y_group2=train_test_split(X_part1,y_part1,test_size=0.50,random_state=0,stratify=y_part1)
    X_group3,X_group4,y_group3,y_group4=train_test_split(X_part2,y_part2,test_size=0.50,random_state=0,stratify=y_part2)

    for i in  range(1,5):
        vector = range(0,5)
        group_number = str(i)
        X_test = globals()["X_group"+group_number]
        y_test = globals()["y_group"+group_number]

        # Use the other 3 groups for training 
        keep = list({1,2,3,4}-{int(group_number)}) # remove group number of current test 
        X_train = pd.concat((globals()["X_group"+str(keep[0])],globals()["X_group"+str(keep[1])],globals()["X_group"+str(keep[2])]))
        y_train = np.concatenate((globals()["y_group"+str(keep[0])],globals()["y_group"+str(keep[1])],globals()["y_group"+str(keep[2])]))
        
        logreg = LogisticRegression(penalty="l2",solver="liblinear",max_iter=2000, C=10e-2,class_weight="balanced")
        
        # fit the model with data
        logreg.fit(X_train,y_train)
        y_pred=logreg.predict(X_test)
        y_pred_proba = logreg.predict_proba(X_test)[::,1] # logits for 1 cross-validation
        #y_pred_proba = logreg.decision_function(X_test) # signed distance of sample from hyperplane of your model.
        
        all_y_test.append(y_test)
        all_y_pred_proba.append(y_pred_proba)
        all_y_pred.append(y_pred)
        all_y_names.append(X_test.index)

    all_y_test = np.concatenate(all_y_test)
    all_y_pred_proba = np.concatenate(all_y_pred_proba)
    all_y_names = np.concatenate(all_y_names)
    all_y_pred = np.concatenate(all_y_pred)

    logits_matrix.loc[goterm,all_y_names] = all_y_pred_proba
    slim_matrix_single_neuron.loc[goterm,all_y_names] = all_y_test
    preds_matrix.loc[goterm,all_y_names] = all_y_pred

    # fpr, tpr, _ = metrics.roc_curve(all_y_test,  all_y_pred_proba)
    # GO_terms_auc_log[goterm] = metrics.auc(fpr, tpr) # same as roc_auc_score
    GO_terms_auc_log[goterm] = metrics.roc_auc_score(all_y_test, all_y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(all_y_test, all_y_pred_proba)
    GO_terms_aupr_log[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_log[goterm] = metrics.precision_score(all_y_test, all_y_pred)
NameError: name 'train_test_split' is not defined
  • class_weight=“balanced”… mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y))

  • C , default=1.0… Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.

  • solver{‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}

  • Los resultados son iguales si uso predict_proba o decision_function, solo a la hora de interpretar predicted_proba si me da un porcentaje y decision_function una distancia a la recta, el AUC me sale exactamente igual en la regresion logistica

Code
GO_terms_auc_log_df = pd.DataFrame(list(GO_terms_auc_log.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_log_df = GO_terms_auc_log_df.dropna()
GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)
Code
print("There are " +str(len(GO_terms_auc_log_df))+ " logistic regression models.")
Code
# only keep goterms that have a model 
logits_matrix = logits_matrix.loc[list(GO_terms_auc_log_df.index),:]
slim_matrix_single_neuron  = slim_matrix_single_neuron.loc[list(GO_terms_auc_log_df.index),:]
preds_matrix  = preds_matrix.loc[list(GO_terms_auc_log_df.index),:]

AUC histogram

Code
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(GO_terms_auc_log_df[GO_terms_auc_log_df["auc"]>0.69])/len(GO_terms_auc_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of GO term models", fontsize=20)
colors2 = {'GO term models with AUC>=0.7':CB_color_cycle[2]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.71, 8, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance of the models using expression", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'modelsAUClog.png', transparent=True)

AUC boxplot by parents

Code
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_auc_log_df.index)):
    term = GO_terms_auc_log_df.index[i]
    number_parents[GO_terms_auc_log_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_auc_log_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_auc_log_df = pd.concat([GO_terms_auc_log_df, levels,number_parents], axis=1)
GO_terms_auc_log_df.columns = ["auc","levels","parents"]
Code
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
ax = sns.boxplot(x="levels", y="auc", data=GO_terms_auc_log_df)

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("Level number", fontsize=20)
plt.ylabel("AUC value", fontsize=20)
plt.title("AUC value per level of GO hierarchy", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'AUCbyLevelslog.png', transparent=True)
Code
GO_terms_auc_log_df.head()

import pyreadr pyreadr.write_rdata(“C:/Users/ksada/OneDrive - Tecnun/SparseGO_Rdata/cv_allsamples_mutations/”+“GO_terms_auc_logarithm.RData”, GO_terms_auc_log_df.reset_index(), df_name=“GO_terms_auc_logarithm”)

TOP 15 PREDICTED GO TERMS

Code
top15goterms= np.array(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)[0:15].index)

Get Top GO term names

Code
top15goterms_1 = []
for goterm in top15goterms:
    top15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_best = real_go_info[real_go_info.GO_term.isin(top15goterms_1)]
real_go_info_mod_best.GO_term = real_go_info_mod_best.GO_term.str.replace("_1","")
Code
top15goterms_auc = GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)[0:15].reset_index()
top15goterms_auc.columns=["GO_term","auc","levels","parents"]
Code
top15goterms_auc.merge(real_go_info_mod_best[real_go_info_mod_best["GO_term"].isin(top15goterms)], on="GO_term")

WORST 15 PREDICTED GO TERMS

Code
worst15goterms= np.array(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=True)[0:15].index)

Get Worst GO term names

Code
worst15goterms_1 = []
for goterm in worst15goterms:
    worst15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_worst = real_go_info[real_go_info.GO_term.isin(worst15goterms_1)]
real_go_info_mod_worst.GO_term = real_go_info_mod_worst.GO_term.str.replace("_1","")
Code
worst15goterms_auc = GO_terms_auc_log_df.sort_values(by=["auc"], ascending=True)[0:15].reset_index()
worst15goterms_auc.columns=["GO_term","auc","levels","parents"]
Code
worst15goterms_auc.merge(real_go_info_mod_worst[real_go_info_mod_worst["GO_term"].isin(worst15goterms)], on="GO_term")

AUPR histogram

Code
GO_terms_aupr_log_df = pd.DataFrame(list(GO_terms_aupr_log.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_log_df = GO_terms_aupr_log_df.dropna()
GO_terms_aupr_log_df.sort_values(by=["aupr"], ascending=False).head()
Code
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_log_df[GO_terms_aupr_log_df["aupr"]>0.69])/len(GO_terms_aupr_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)

Precision histogram

Code
GO_terms_precision_log_df = pd.DataFrame(list(GO_terms_precision_log.items()),columns = ['goterm','precision']).set_index("goterm")
GO_terms_precision_log_df = GO_terms_precision_log_df.dropna()
GO_terms_precision_log_df.sort_values(by=["precision"], ascending=False).head()
Code
perc = str(round((100*len(GO_terms_precision_log_df[GO_terms_precision_log_df["precision"]>0.69])/len(GO_terms_precision_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_precision_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[4])

plt.xlabel("Precision", fontsize=16)  
plt.title(perc, fontsize=16)

Example prediction

Code
def f2(goterm):    
    return goterm
Code
combobox_go = interactive(f2, goterm=widgets.Combobox(options=list(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

Code
display(combobox_go)
Code
selected_go = combobox_go.result
Code
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(logits_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )
Code
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go])
plt.grid(b=None)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go])) #TP / (TP+FN)

TN - FP

FN - TP

Code
precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
auc_precision_recall = metrics.auc(recall, precision)
plt.plot(recall, precision,label=str(auc_precision_recall))
plt.legend(loc=4)
plt.show()

METRICS drugs

Code
auc_drugs = {}
aupr_drugs = {}
for drug in list(slim_matrix_single_neuron.columns):
    if slim_matrix_single_neuron.loc[:,drug].sum() ==0:
        continue
    auc_drugs[drug] = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,drug],  logits_matrix.loc[:,drug])
    precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[:,drug],  logits_matrix.loc[:,drug])
    aupr_drugs[drug] = metrics.auc(recall, precision)

auc_drugs_df = pd.DataFrame(list(auc_drugs.items()),columns = ['goterm','auc']).set_index("goterm")
auc_drugs_df = auc_drugs_df.dropna()

aupr_drugs_df = pd.DataFrame(list(aupr_drugs.items()),columns = ['goterm','aupr']).set_index("goterm")
aupr_drugs_df = aupr_drugs_df.dropna()

AUC histogram drugs

Code
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(auc_drugs_df[auc_drugs_df["auc"]>0.69])/len(auc_drugs_df)),2))+"%"
N, bins, patches = plt.hist(auc_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[5])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of drugs", fontsize=20)
colors2 = {'Drugs with AUC>=0.7':CB_color_cycle[5]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.79, 6, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance by drugs using mutations", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'drugsAUClog.png', transparent=True)
Code
auc_drugs_df.sort_values(by=["auc"], ascending=False)

AUPR histogram drugs

Code
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(aupr_drugs_df[aupr_drugs_df["aupr"]>0.69])/len(aupr_drugs_df)),2))+"%"
N, bins, patches = plt.hist(aupr_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR drugs", fontsize=16)  
plt.title(perc, fontsize=16)
Code
auc_drugs_df.sort_values(by=["auc"], ascending=False)

Example drug prediction

Code
def f(drug):    
    return drug
Code
combobox = interactive(f, drug=widgets.Combobox(options=list(auc_drugs_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

Code
display(combobox)
Code
selected_drug_name = combobox.result
Code
sns.set(rc={'figure.figsize':(4,2)})
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,selected_drug_name], logits_matrix.loc[:,selected_drug_name] )
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,selected_drug_name],  logits_matrix.loc[:,selected_drug_name])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[:,selected_drug_name]),pd.DataFrame(logits_matrix.loc[:,selected_drug_name])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )
Code
sum(slim_matrix_single_neuron.loc[:,selected_drug_name])
Code
plot = pd.concat([pd.DataFrame(slim_matrix.loc[:,selected_drug_name]),pd.DataFrame(attribution_data_all.loc[:,selected_drug_name]*1e4)], axis=1)
plot.columns = ["slim","attribution"]
ax = sns.boxplot(x="slim", y="attribution", data=plot,showfliers=True )
Code
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name])
plt.grid(visible=None)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)

View drug’s top functions…

Code
predictions_nodes = []
for goterm in list(logits_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
Code
# add names to go terms
real_go_info_log = real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_log.GO_term = real_go_info_log.GO_term.str.replace("_1","")
Code
# LOS LOGITS DE TEST!!
test_drug_logs = pd.DataFrame(logits_matrix.loc[:,selected_drug_name]).reset_index()
test_drug_logs.columns  = ["GO_term","probability"]
test_drug_logs = test_drug_logs.merge(real_go_info_log, on="GO_term")
test_drug_logs.sort_values(by=["probability"], ascending=False)
Code
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="layer_number", y="probability", data=test_drug_logs, order=[7,6,5,4,3,2,1,0],showfliers=False)
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')

Final model

Once the models have been cross-validated we create the final models using all samples…

Code
GO_terms_auc_log_final = {}
GO_terms_aupr_log_final = {}
GO_terms_precision_log_final = {}
models_log = {}

# Perform logistics
for goterm in sparseGO_terms:
    #print(goterm)
    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
    
    if sum(goterm_drugs) <= 10:
        continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    score_mod = score.divide(score.std()).fillna(0)
    
    # train and test are the same 
    X_train = score_mod
    X_test = score_mod
    y_train = goterm_drugs
    y_test = goterm_drugs
    
    logreg = LogisticRegression(penalty="l2",solver="liblinear",max_iter=2000, C=10e-2,class_weight="balanced")
    # fit the model with data
    logreg.fit(X_train,y_train)
    y_pred=logreg.predict(X_test)

    #auc
    y_pred_proba = logreg.predict_proba(X_test)[::,1]
    GO_terms_auc_log_final[goterm] = metrics.roc_auc_score(y_test, y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)
    GO_terms_aupr_log_final[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_log_final[goterm] = metrics.recall_score(y_test, y_pred)
    models_log[goterm]=logreg
Code
len(models_log)

Final model AUC

Code
GO_terms_auc_log_df_final = pd.DataFrame(list(GO_terms_auc_log_final.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_log_df_final = GO_terms_auc_log_df_final.dropna()
GO_terms_auc_log_df_final.sort_values(by=["auc"], ascending=False)
Code
sns.set(rc={'figure.figsize':(6,4)})
perc = str(round((100*len(GO_terms_auc_log_df_final[GO_terms_auc_log_df_final["auc"]>0.7])/len(GO_terms_auc_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.xlabel("AUC (logistic 1)", fontsize=16)  
plt.title(perc, fontsize=16)
# con el que mejor funciona es con la suma normal del attribution 

Final model AUPR

Code
GO_terms_aupr_log_df_final = pd.DataFrame(list(GO_terms_aupr_log_final.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_log_df_final = GO_terms_aupr_log_df_final.dropna()
GO_terms_aupr_log_df_final.sort_values(by=["aupr"], ascending=False).head()
Code
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_log_df_final[GO_terms_aupr_log_df_final["aupr"]>0.7])/len(GO_terms_aupr_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)

Final model Recall

Code
GO_terms_precision_log_df_final = pd.DataFrame(list(GO_terms_precision_log_final.items()),columns = ['goterm','precision']).set_index("goterm")
GO_terms_precision_log_df_final = GO_terms_precision_log_df_final.dropna()
GO_terms_precision_log_df_final.sort_values(by=["precision"], ascending=False).head()
Code
perc = str(round((100*len(GO_terms_precision_log_df_final[GO_terms_precision_log_df_final["precision"]>0.69])/len(GO_terms_precision_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_precision_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[4])

plt.xlabel("Recall", fontsize=16)  
plt.title(perc, fontsize=16)

Predict for a new drug

Make predictions

Code
unknown = list(set(attribution_data_all.columns)-set(attribution_data_annotated.columns))

Get the probabilities for all unknown drugs

Code
predictions = {}
probabilities = {}
probabilities_unknown = pd.DataFrame()

for drug in unknown:
    for goterm in models_log.keys():

        list_nodes = list(models_log[goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)

        score = attribution_data_all.loc[list_nodes][drug].to_frame().T 
        score_mod = score.divide(attribution_data_annotated.loc[list_nodes].T.std()).fillna(0) #divide by std of each neuron, only use drugs that trained the models

        #predictions[goterm]=models_log[goterm].predict(score_mod)

        #auc
        probabilities[goterm] = models_log[goterm].predict_proba(score_mod)[::,1]

        drug_probs = pd.DataFrame.from_dict(probabilities).T
        drug_probs.columns = [drug]
        
    print(drug)
    probabilities_unknown = pd.concat([probabilities_unknown,drug_probs], axis=1)
Code
# Save file
with open(resultsdir+'probabilities_unknown_MG2_log_sum.pkl', 'wb') as dictionary_file:
  pickle.dump(probabilities_unknown, dictionary_file)

IMPORT file

Code
# To import dataframe created before
with open(resultsdir+'probabilities_unknown_MG2_log_sum.pkl', 'rb') as dictionary_file:
    probabilities_unknown = pickle.load(dictionary_file)  

import pyreadr pyreadr.write_rdata(“C:/Users/ksada/OneDrive - Tecnun/SparseGO_Rdata/cv_allsamples_mutations/”+“probabilities_known.RData”, probabilities_unknown.reset_index(), df_name=“probabilities_known”)

Study drug with unknown MOA

Choose drug with unknown MOA…

Code
combobox_u = interactive(f, drug=widgets.Combobox(options=unknown))
Code
display(combobox_u)
Code
selected_drug_u_name = combobox_u.result
Code
probabilities_df = pd.DataFrame.from_dict(probabilities_unknown.loc[:,selected_drug_u_name]).reset_index()
probabilities_df.columns  = ["GO_term","probability"]
probabilities_df = probabilities_df.merge(real_go_info_log, on="GO_term")
probabilities_df.sort_values(by=["probability"], ascending=False)
Code
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="layer_number", y="probability", data=probabilities_df, order=[7,6,5,4,3,2,1,0],showfliers=False)
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')
  • bendamustine –> buen ejemplo, parece que tiene sentido lo que sale, es un farmaco para la leucemia y salen cosas de la sangre https://pubchem.ncbi.nlm.nih.gov/compound/65628#section=Chemical-Vendors
  • temozolomide –> es para Glioblastoma y me sale brain de los más altos y algo de calcium que tiene algo que ver
  • Bleomycin –>

Look for the GO terms (neurons) that vary the most variance between drugs…

Code
# Attribution
topvariance = list(attribution_data_all.var(axis=1).sort_values(axis=0, ascending=False)[0:100].index.values)
data_array = attribution_data_all.loc[topvariance].T

Or cluster by the probability…

Code
# Probabilities
topvariance = list(probabilities_unknown.var(axis=1).sort_values(axis=0, ascending=False)[0:200].index.values)
data_array = probabilities_unknown.loc[topvariance].T
Code
import plotly.graph_objects as go
import plotly.figure_factory as ff
from scipy.spatial.distance import pdist, squareform

# get data
labelsGOterms = np.array(data_array.columns)
labelsDrugs = np.array(data_array.index)

# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(data_array.T, orientation='bottom',labels=labelsGOterms)
for i in range(len(fig['data'])):
    fig['data'][i]['yaxis'] = 'y2'

# Create Side Dendrogram
dendro_side = ff.create_dendrogram(data_array, orientation='right',labels=labelsDrugs)
for i in range(len(dendro_side['data'])):
    dendro_side['data'][i]['xaxis'] = 'x2'

# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
    fig.add_trace(data)

# Create Heatmap

heatmap = [
    go.Heatmap(
        x = fig['layout']['xaxis']['ticktext'],
        y = dendro_side['layout']['yaxis']['ticktext'],
        z = data_array.loc[dendro_side['layout']['yaxis']['ticktext'],fig['layout']['xaxis']['ticktext']],
         zmin=0, zmax=1
    )
]

heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']

# Add Heatmap Data to Figure
for data in heatmap:
    fig.add_trace(data)

fig['layout']['yaxis']['ticktext'] = dendro_side['layout']['yaxis']['ticktext']
fig['layout']['yaxis']['tickvals'] = np.asarray(dendro_side['layout']['yaxis']['tickvals'])
    
# Edit Layout
fig.update_layout({'width':800, 'height':1100,
                         'showlegend':False, 'hovermode': 'closest',
                         })
# Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'showticklabels': False,
                                  'ticks': ""
                        })
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[.825, .975],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Plot!
fig.show()

Dendograms - Most commonly created as an output from hierarchical clustering. - The key to interpreting is to focus on the height at which any two objects are joined together. When the height of the link that joins the rows together is the smallest, their are the most similar. - Gives an idea of the number of clusters (but can’t determine the number).

Code
from scipy.stats import ranksums 
GO_terms_wilcox = {}
number_ones_w = {}
sum_attribution = {}
#terms_direct_genes = {}
terms_all_genes = {}
# Perform wilcox
for goterm in slim_matrix.index:
    goterm_drugs = slim_matrix.loc[[goterm]].T
    goterm_drugs.columns = ["slim"]
    score = attribution_data_all.loc[[goterm]].T
    score.columns = ["score"]
    slim_score = goterm_drugs.join(score)
    slim_score.columns = ["slim","score"]
    number_ones_w[goterm] = sum(goterm_drugs.values.flatten())
    sum_attribution[goterm] = sum(score.values.flatten())
    #terms_direct_genes[goterm]=len(term_direct_gene_map[goterm[:-2]])
    terms_all_genes[goterm]=(term_size_map[goterm[:-2]])
    GO_terms_wilcox[goterm] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"]).pvalue
    #GO_terms_wilcox[goterm] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"],alternative="greater").pvalue
Code
GO_terms_wilcox_df = pd.DataFrame(list(GO_terms_wilcox.items()),columns = ['goterm','score']).set_index("goterm")
GO_terms_wilcox_df = GO_terms_wilcox_df.dropna()
GO_terms_wilcox_df.sort_values(by=["score"], ascending=True)
Code
GO_terms_wilcox_df

Percentage lower than 0.05…

Code
perc = str(round((100*len(GO_terms_wilcox_df[GO_terms_wilcox_df["score"]<0.05])/len(GO_terms_wilcox_df)),2))+"%"
Code
sns.set(rc={'figure.figsize':(5,3)})
#sns.histplot(data=GO_terms_wilcox_df, x="score", kde=True, color="olive", bins=100).set(title='Wilcox GO terms - '+perc)
histogram(GO_terms_wilcox_df["score"],CB_color_cycle[0],'Wilcox GO terms - '+perc,"Number of GO terms",n_bins=200)

Draw

Code
goterm="GO:1903077_1"
slim = slim_matrix.loc[[goterm]].T
slim.columns = ["slim"]
score = attribution_data_annotated.loc[[goterm]].T
score.columns = ["score"]
score.index = slim.index
plot = slim.join(score)
plot.columns = ["slim","score"]
ax = sns.boxplot(x="slim", y="score", data=plot )
Code
# Plotting the KDE Plot
sns.kdeplot(plot.loc[plot["slim"] == 1]["score"], color='orange', shade=True, label=1)
sns.kdeplot(plot.loc[plot["slim"] == 0]["score"], color='blue', shade=True, label=0)
plt.xlabel('Attribution')
plt.ylabel('Probability Density')
Code
slim.sum()
Code
len(plot)

esta no tiene sentido si no es absoluto el valor (porque puede afectar o positiva o negativamente el attribution)

Code
from scipy.stats import ranksums 
Code
drugs_wilcox = {}
# Perform wilcox
number_parents = {}
levels = {}
for i in range(0,len(slim_matrix.index)):
    term = slim_matrix.index[i][:-2]
    number_parents[slim_matrix.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[slim_matrix.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

for drug in slim_matrix.columns:
    slim_score = pd.concat([slim_matrix[drug], attribution_data_all[drug],levels], axis=1)
    slim_score.columns = ["slim","score","levels"]
    #slim_score = slim_score.loc[slim_score["levels"] != 7]
    drugs_wilcox[drug] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"]).pvalue
    #drugs_wilcox[drug] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"],alternative="greater").pvalue
Code
drugs_wilcox_df = pd.DataFrame(list(drugs_wilcox.items()),columns = ['goterm','score']).set_index("goterm")
drugs_wilcox_df = drugs_wilcox_df.dropna()
drugs_wilcox_df.sort_values(by=["score"], ascending=True)
Code
len(drugs_wilcox_df[drugs_wilcox_df["score"]<0.05])
Code
len(drugs_wilcox_df)
Code
perc = str(round(100*(len(drugs_wilcox_df[drugs_wilcox_df["score"]<0.05])/len(drugs_wilcox_df)),2))+"%"
Code
sns.set(rc={'figure.figsize':(4,4)})
histogram(drugs_wilcox_df["score"],CB_color_cycle[1],'Wilcox Drugs - '+perc,"Number of Drugs",n_bins=10)

Draw

Code
drug="selumetinib"
Code
plot = pd.concat([slim_matrix[drug], attribution_data_all[drug],number_parents,levels],axis=1)
plot.columns = ["slim","score","parents","levels"]
Code
sns.set(rc={'figure.figsize':(4,4)})
ax = sns.boxplot(x="slim", y="score", data=plot,showfliers=True )
Code
ranksums(plot.loc[plot["slim"] == 1]["score"], plot.loc[plot["slim"] == 0]["score"]).pvalue
Code
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="levels", y="score", hue="slim", data=plot, order=[7,6,5,4,3,2,1,0])
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')
Code
vals = list()
for i in range(0,8):
    plot_level = plot.loc[plot["levels"] == i]
    pvalue = ranksums(plot_level.loc[plot_level["slim"] == 1]["score"], plot_level.loc[plot_level["slim"] == 0]["score"]).pvalue
    vals.append(pvalue) 
    print("P-value level "+str(i)+": "+str(pvalue))

    
from scipy.stats import combine_pvalues
cleanedvals  = [x for x in vals if ~np.isnan(x)] # delete nans, some levels have only 1 class
combine_pvalues(cleanedvals,method='fisher',weights=None)
Code
sns.set(rc={'figure.figsize':(20,8)})
ax = sns.boxplot(x="parents", y="score", hue="slim", data=plot)
Code
len(plot.loc[plot["slim"] == 1])/6
Code
len(plot.loc[plot["slim"] == 0])/6

Wilcox by layers and add by fisher

Code
from scipy.stats import ranksums, combine_pvalues

drugs_wilcox_levels = {}
# Perform wilcox
for drug in slim_matrix.columns:
    slim_score = pd.concat([slim_matrix[drug], attribution_data_all[drug],number_parents,levels], axis=1)
    slim_score.columns = ["slim","score","parents","levels"]
    
    vals = list()
    for i in range(1,27):
        slim_score_level = slim_score.loc[slim_score["parents"] == i]
        pvalue = ranksums(slim_score_level.loc[slim_score_level["slim"] == 1]["score"], slim_score_level.loc[slim_score_level["slim"] == 0]["score"]).pvalue
        vals.append(pvalue) 
        
    cleanedvals  = [x for x in vals if ~np.isnan(x)] # delete nans, some levels have only 1 class
    s, drugs_wilcox_levels[drug] = combine_pvalues(cleanedvals,method='fisher',weights=None)
    print(drug)
Code
slim_score
Code
drugs_wilcox_levels_df = pd.DataFrame(list(drugs_wilcox_levels.items()),columns = ['goterm','score']).set_index("goterm")
drugs_wilcox_levels_df = drugs_wilcox_levels_df.dropna()
drugs_wilcox_levels_df.sort_values(by=["score"], ascending=True)
Code
sns.set(rc={'figure.figsize':(4,4)})
perc = str(round(100*(len(drugs_wilcox_levels_df[drugs_wilcox_levels_df["score"]<0.05])/len(drugs_wilcox_levels_df)),2))+"%"
histogram(drugs_wilcox_levels_df["score"],CB_color_cycle[1],'Wilcox Drugs - '+perc,"Number of Drugs",n_bins=10)
Code
len(slim_matrix.columns)

SVM

Code
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
Code
from sklearn import svm
Code
slim_matrix_single_neuron = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
preds_svm_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
platt_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
distance_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
delta_logits_matrix  = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)

Create models

Regression models…

Code
# Dictionaries to store results
GO_terms_auc_svm = {}
GO_terms_aupr_svm = {}
GO_terms_precision_svm = {}
GO_terms_auc_delta_logits = {}

# Perform logistic
for goterm in sparseGO_terms:
    # if (real_go_info[real_go_info["GO_term"]==goterm+"_1"]["layer_number"]).values >3:
    #     continue
    
    # store results of each cross validation
    all_y_test = []
    all_y_pred_proba = []
    all_y_pred_proba_dis = []
    all_y_pred = []
    all_y_names = []

    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()

    if sum(goterm_drugs) <= 8: # at least 2 annotated drugs in each group
            continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    #score_mod = score
    score_mod = score.divide(score.std()).fillna(0) # AFECTA MUCHO

    # Separate drugs in 4 groups for cross-validation -----

    # Split data in 2 groups (with train_test_split in order to have 0s in both groups)
    X_part1,X_part2,y_part1,y_part2=train_test_split(score_mod,goterm_drugs,test_size=0.50,random_state=0,stratify=goterm_drugs)
    # Split data again in 4 groups (split data previously split)
    X_group1,X_group2,y_group1,y_group2=train_test_split(X_part1,y_part1,test_size=0.50,random_state=0,stratify=y_part1)
    X_group3,X_group4,y_group3,y_group4=train_test_split(X_part2,y_part2,test_size=0.50,random_state=0,stratify=y_part2)

    for i in  range(1,5):
        vector = range(0,5)
        group_number = str(i)
        X_test = globals()["X_group"+group_number]
        y_test = globals()["y_group"+group_number]

        # Use the other 3 groups for training 
        keep = list({1,2,3,4}-{int(group_number)}) # remove group number of current test 
        X_train = pd.concat((globals()["X_group"+str(keep[0])],globals()["X_group"+str(keep[1])],globals()["X_group"+str(keep[2])]))
        y_train = np.concatenate((globals()["y_group"+str(keep[0])],globals()["y_group"+str(keep[1])],globals()["y_group"+str(keep[2])]))
        
        #gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
        gamma = "scale"
        C=1
        
        svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
                           class_weight="balanced",
                            tol=0.001,
                            probability=True,
                            random_state=1234)
        # svm_model = svm.SVC(gamma='auto', kernel='rbf',class_weight="balanced",probability=True)
        
        # fit the model with data
        svm_model.fit(X_train,y_train)
        y_pred=svm_model.predict(X_test)
        y_pred_proba = svm_model.predict_proba(X_test)[::,1] # platt values
        y_pred_proba_dis = svm_model.decision_function(X_test) # An SVM returns a real-valued prediction for each of the input data samples, which corresponds to its distance from the separating hyperplane.
        #  decision_function SORTS the results from most probable class to the least probable one.
        
        all_y_test.append(y_test)
        all_y_pred_proba.append(y_pred_proba)
        all_y_pred_proba_dis.append(y_pred_proba_dis)
        all_y_pred.append(y_pred)
        all_y_names.append(X_test.index)

    all_y_test = np.concatenate(all_y_test)
    all_y_pred_proba = np.concatenate(all_y_pred_proba)
    all_y_pred_proba_dis = np.concatenate(all_y_pred_proba_dis)
    all_y_names = np.concatenate(all_y_names)
    all_y_pred = np.concatenate(all_y_pred)
    
    percentage_go_annotations = sum(all_y_test)/len(all_y_test)
    logits_apriori=np.log(percentage_go_annotations/(1-percentage_go_annotations))
    logits_apost= np.log(all_y_pred_proba/(1-all_y_pred_proba))
    delta_logits = logits_apost-logits_apriori

    platt_matrix.loc[goterm,all_y_names] = all_y_pred_proba
    distance_matrix.loc[goterm,all_y_names] = all_y_pred_proba_dis
    slim_matrix_single_neuron.loc[goterm,all_y_names] = all_y_test
    preds_svm_matrix.loc[goterm,all_y_names] = all_y_pred
    
    delta_logits_matrix.loc[goterm,all_y_names] = delta_logits

    GO_terms_auc_delta_logits[goterm] = metrics.roc_auc_score(all_y_test, delta_logits)
    GO_terms_auc_svm[goterm] = metrics.roc_auc_score(all_y_test, all_y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(all_y_test, all_y_pred_proba)
    GO_terms_aupr_svm[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_svm[goterm] = metrics.precision_score(all_y_test, all_y_pred)
Code
# done with platt values
GO_terms_auc_svm_df = pd.DataFrame(list(GO_terms_auc_svm.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df = GO_terms_auc_svm_df.dropna()
GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)
auc
goterm
GO:0036289 0.999708
GO:0060440 0.994743
GO:0042149 0.971292
GO:1902455 0.969545
GO:0001556 0.965979
GO:0045636 0.955115
GO:0010750 0.955000
GO:0060020 0.949434
GO:1902042 0.945804
GO:1902236 0.941667
GO:0070059 0.936432
GO:0051453 0.935521
GO:0042659 0.931364
GO:0006360 0.930046
GO:0006959 0.921730
GO:0051607 0.920817
GO:1903800 0.920626
GO:0090201 0.912623
GO:0031047 0.911775
GO:2001021 0.910002
GO:1901029 0.910000
GO:0071353 0.909591
GO:0010832 0.909091
GO:0032469 0.903519
GO:0021695 0.901667
GO:0007059 0.892878
GO:0030890 0.892045
GO:0046898 0.888810
GO:0060632 0.886818
GO:0034983 0.883182
GO:0007617 0.883182
GO:0000423 0.882273
GO:0032147 0.881504
GO:1905278 0.881347
GO:0071670 0.879315
GO:0002326 0.879244
GO:1905710 0.876905
GO:0046666 0.875142
GO:1900118 0.873810
GO:0007263 0.873636
GO:0071364 0.872248
GO:0035195 0.869873
GO:0016573 0.868132
GO:0001662 0.866835
GO:2000134 0.865329
GO:0010559 0.862662
GO:0010575 0.860812
GO:0031640 0.860238
GO:0046902 0.858738
GO:0010971 0.858182
GO:0071480 0.855476
GO:0050864 0.854491
GO:0070842 0.853636
GO:0072384 0.853182
GO:2000739 0.852446
GO:1902459 0.852273
GO:0042177 0.852130
GO:0000086 0.851667
GO:0043552 0.850086
GO:0007528 0.849699
GO:0048008 0.844833
GO:0051384 0.844332
GO:0140013 0.842435
GO:0006396 0.842431
GO:0010039 0.842031
GO:0001501 0.840636
GO:0043922 0.839545
GO:0001658 0.837219
GO:0048170 0.836740
GO:0035733 0.836509
GO:2000379 0.836190
GO:0045737 0.835954
GO:0008637 0.835558
GO:0042771 0.835384
GO:0035025 0.833857
GO:0048536 0.833591
GO:0035584 0.831190
GO:0030513 0.830881
GO:0090037 0.830315
GO:0060766 0.830000
GO:1990403 0.830000
GO:0035162 0.829268
GO:0006261 0.828975
GO:0048538 0.826111
GO:0043162 0.825455
GO:1902166 0.825017
GO:0090141 0.824197
GO:0001782 0.822096
GO:1902533 0.820384
GO:0030593 0.820257
GO:0060312 0.820093
GO:0042472 0.819091
GO:0014068 0.817376
GO:0045727 0.816138
GO:0051926 0.814857
GO:0046326 0.814692
GO:0098780 0.814091
GO:0070932 0.813182
GO:0035855 0.812759
GO:0007030 0.812298
GO:0061734 0.812273
GO:0001569 0.811348
GO:0031648 0.810748
GO:0042733 0.810455
GO:0006839 0.807167
GO:0033028 0.807100
GO:0008625 0.807000
GO:0042552 0.806818
GO:0045987 0.806685
GO:0048743 0.805650
GO:1904950 0.803419
GO:0046329 0.802727
GO:0070584 0.802619
GO:2001243 0.802220
GO:0070528 0.802195
GO:0016575 0.801843
GO:0002931 0.801321
GO:0031069 0.801131
GO:0008045 0.799917
GO:0035790 0.798930
GO:0006310 0.798815
GO:0035788 0.798398
GO:0006576 0.797944
GO:0071900 0.797738
GO:0000209 0.797569
GO:0060789 0.797273
GO:0048701 0.796899
GO:0051301 0.795132
GO:0006367 0.794643
GO:0008210 0.794343
GO:0014827 0.792599
GO:0001824 0.792431
GO:0060325 0.792431
GO:2001234 0.792173
GO:0018205 0.791850
GO:0000165 0.791809
GO:0045668 0.791352
GO:0002437 0.790698
GO:0043407 0.790675
GO:1905065 0.789285
GO:1900020 0.786818
GO:0043029 0.786732
GO:1990384 0.786426
GO:0001666 0.786045
GO:0038083 0.785139
GO:1901224 0.785000
GO:2001240 0.784587
GO:0035909 0.784038
GO:0060348 0.783839
GO:2000010 0.783765
GO:0048812 0.783570
GO:0060271 0.782492
GO:0031532 0.782467
GO:0021575 0.781469
GO:0071277 0.780423
GO:0071456 0.780420
GO:0007266 0.780167
GO:1903010 0.779746
GO:0051770 0.779328
GO:0032386 0.778379
GO:0007286 0.777686
GO:0051902 0.776584
GO:0035234 0.776581
GO:0051973 0.775994
GO:0030193 0.775993
GO:0006937 0.774061
GO:0006612 0.773819
GO:0070373 0.773756
GO:0030878 0.773333
GO:0071839 0.773261
GO:1903146 0.772727
GO:0009791 0.772527
GO:0002718 0.772512
GO:0006303 0.770833
GO:1904019 0.770060
GO:0090398 0.768549
GO:0006996 0.767923
GO:0018108 0.767679
GO:0045444 0.767619
GO:0051225 0.766780
GO:0031099 0.766709
GO:0042060 0.766340
GO:0030336 0.765324
GO:0014823 0.764622
GO:0048010 0.764467
GO:0010038 0.763667
GO:0048839 0.763532
GO:0007098 0.763143
GO:0007202 0.762897
GO:0070374 0.762346
GO:0002318 0.761905
GO:0010718 0.761098
GO:0000422 0.760897
GO:0046777 0.760636
GO:0034976 0.760360
GO:0071320 0.760181
GO:0072284 0.758906
GO:0051150 0.758697
GO:0072210 0.757824
GO:0001823 0.757802
GO:0001934 0.757567
GO:2000811 0.757446
GO:0072073 0.757016
GO:1905897 0.756979
GO:0006260 0.756188
GO:0030316 0.756145
GO:0060612 0.756115
GO:0033327 0.755659
GO:0043406 0.755481
GO:0035924 0.754939
GO:0034097 0.754808
GO:0030325 0.754299
GO:0007026 0.753636
GO:0030308 0.753278
GO:0038033 0.753247
GO:0043129 0.752058
GO:0071276 0.751861
GO:0050792 0.751758
GO:0043586 0.750958
GO:1903077 0.750714
GO:0021549 0.749863
GO:0034502 0.749669
GO:0030318 0.749475
GO:0006975 0.748617
GO:1901990 0.747843
GO:0006270 0.747613
GO:0070588 0.747355
GO:0002053 0.747159
GO:0051056 0.747105
GO:0002366 0.746692
GO:0030324 0.746275
GO:0001701 0.746078
GO:0072659 0.745865
GO:0051354 0.745455
GO:0016239 0.745323
GO:0016032 0.744154
GO:0035094 0.743843
GO:1901796 0.743803
GO:1901987 0.743725
GO:0051147 0.742794
GO:0050769 0.741231
GO:0044770 0.740140
GO:0045747 0.739761
GO:0030307 0.738934
GO:1900006 0.738896
GO:0002327 0.738855
GO:0031274 0.738194
GO:0050921 0.737977
GO:0060562 0.737526
GO:0002062 0.737273
GO:0035994 0.736442
GO:0048286 0.734524
GO:0007259 0.734203
GO:0007186 0.733191
GO:0043408 0.733015
GO:0006368 0.732798
GO:0022612 0.732716
GO:0002009 0.732495
GO:0000082 0.732331
GO:2001236 0.732323
GO:0007585 0.732118
GO:0036120 0.732047
GO:0031507 0.732041
GO:0045595 0.731846
GO:1901988 0.731842
GO:0051051 0.731556
GO:0090630 0.731009
GO:0045860 0.730882
GO:0046427 0.730341
GO:0042475 0.729571
GO:0043627 0.729551
GO:0045930 0.729357
GO:0001952 0.728961
GO:0006605 0.728584
GO:0000902 0.728556
GO:0072593 0.728121
GO:0030216 0.727941
GO:0031667 0.727679
GO:0090184 0.727501
GO:0038007 0.727501
GO:0051899 0.727139
GO:0097193 0.727005
GO:0007010 0.726465
GO:0030509 0.726433
GO:0045088 0.726190
GO:0032740 0.725993
GO:0055118 0.725993
GO:0034220 0.725642
GO:0030010 0.725581
GO:0007169 0.725512
GO:2001257 0.725455
GO:0071363 0.725102
GO:0007049 0.724828
GO:0014911 0.724739
GO:0033993 0.724467
GO:0006275 0.723952
GO:0035767 0.723773
GO:0010564 0.722387
GO:0007018 0.722171
GO:0032880 0.722120
GO:0008544 0.722095
GO:0001843 0.721713
GO:0010212 0.721697
GO:0045580 0.721626
GO:0030198 0.721591
GO:0016055 0.721321
GO:0010977 0.720930
GO:0007519 0.720733
GO:0007565 0.720376
GO:0046883 0.720310
GO:0071392 0.720147
GO:0060437 0.719691
GO:0006807 0.719387
GO:0046632 0.718754
GO:0032956 0.718570
GO:0071897 0.718438
GO:0032967 0.717999
GO:0048041 0.717262
GO:0030163 0.717042
GO:0050853 0.716887
GO:0010638 0.716638
GO:0016601 0.716440
GO:0001656 0.716092
GO:0048477 0.716053
GO:0021782 0.715455
GO:0051054 0.715000
GO:0032940 0.714799
GO:0016925 0.714545
GO:0042531 0.714539
GO:0043549 0.713862
GO:0010507 0.713774
GO:0040018 0.713171
GO:0038084 0.712982
GO:0050900 0.712940
GO:0048873 0.712279
GO:0090263 0.712002
GO:0072655 0.711364
GO:0043966 0.711157
GO:0010467 0.711069
GO:2000773 0.711008
GO:0016572 0.710407
GO:0033138 0.710145
GO:0046578 0.709704
GO:0036324 0.709154
GO:0046631 0.708211
GO:0051321 0.707683
GO:0032008 0.707223
GO:0046651 0.707067
GO:0007346 0.707006
GO:0061029 0.706909
GO:0071230 0.706594
GO:0002720 0.705912
GO:0010629 0.705797
GO:0034765 0.705101
GO:0035726 0.704893
GO:0010613 0.704696
GO:0045321 0.704539
GO:0031929 0.704054
GO:0043270 0.703842
GO:0019222 0.703719
GO:0006939 0.703424
GO:0046628 0.703369
GO:0061024 0.703188
GO:0060391 0.702866
GO:0051261 0.702830
GO:0006811 0.701553
GO:0008360 0.701514
GO:0060384 0.701453
GO:0055003 0.700996
GO:0007268 0.700781
GO:0010628 0.699521
GO:0006139 0.699478
GO:0006468 0.699029
GO:0060179 0.698636
GO:0048568 0.698434
GO:0043244 0.697851
GO:0051894 0.697683
GO:0006469 0.697613
GO:0034329 0.697061
GO:0007204 0.696417
GO:1903829 0.696396
GO:0072006 0.696198
GO:0032355 0.695494
GO:0048266 0.695194
GO:0008354 0.694969
GO:0045785 0.694047
GO:0009056 0.693798
GO:0048598 0.693287
GO:0034644 0.692308
GO:0048469 0.692299
GO:0051223 0.692235
GO:0065003 0.692223
GO:0000278 0.692038
GO:0031103 0.691333
GO:0007267 0.691304
GO:0048017 0.690794
GO:0055082 0.690784
GO:0051901 0.690122
GO:0051258 0.690099
GO:0001541 0.689675
GO:0006897 0.689347
GO:0001932 0.688849
GO:0070507 0.688415
GO:0050865 0.687711
GO:0007155 0.687639
GO:0060326 0.687327
GO:0048608 0.687212
GO:0035019 0.686954
GO:0009966 0.686876
GO:0050728 0.686281
GO:1903053 0.686275
GO:0050866 0.686189
GO:0051881 0.686055
GO:0050872 0.685780
GO:0008286 0.683527
GO:0007015 0.683431
GO:0045840 0.683252
GO:1902074 0.683239
GO:0035306 0.682977
GO:0099111 0.682737
GO:0019221 0.681500
GO:0035264 0.681434
GO:0010595 0.681090
GO:0006936 0.680440
GO:0051496 0.679907
GO:0002764 0.679018
GO:0010811 0.677971
GO:0035022 0.677728
GO:0008630 0.677216
GO:0099173 0.676905
GO:0051897 0.676314
GO:0061045 0.676257
GO:0030001 0.676160
GO:0051281 0.675909
GO:0009888 0.675500
GO:0016192 0.675421
GO:0009582 0.675325
GO:0010821 0.675098
GO:0016358 0.675073
GO:2000757 0.675000
GO:0006687 0.674981
GO:0048863 0.674715
GO:0001946 0.674123
GO:0010952 0.673571
GO:0010727 0.672727
GO:0009653 0.672628
GO:0030097 0.672283
GO:0001837 0.672128
GO:0060341 0.671960
GO:0048741 0.671918
GO:0032410 0.671818
GO:0051726 0.671467
GO:0033554 0.670452
GO:0070662 0.670073
GO:0050918 0.670060
GO:0032835 0.669444
GO:0050678 0.668997
GO:0001553 0.668960
GO:0018105 0.668010
GO:0050795 0.667848
GO:0001817 0.667273
GO:0046889 0.666977
GO:0007389 0.666190
GO:0009887 0.666023
GO:0030154 0.665761
GO:0033077 0.665433
GO:0007420 0.665270
GO:0019216 0.664614
GO:0031032 0.664097
GO:0009058 0.664086
GO:0051898 0.663949
GO:0090090 0.663876
GO:0043123 0.663876
GO:0019827 0.663747
GO:0070663 0.663664
GO:0003300 0.663430
GO:0051049 0.663173
GO:0002573 0.663121
GO:0006644 0.663072
GO:1904707 0.661818
GO:0003338 0.661761
GO:0071478 0.661745
GO:0051128 0.661709
GO:0006325 0.661626
GO:0042063 0.661539
GO:0016070 0.661175
GO:0120035 0.661093
GO:0001975 0.659722
GO:0030539 0.659695
GO:0044255 0.658997
GO:0071310 0.658915
GO:0001570 0.658593
GO:0030182 0.658548
GO:0048146 0.658343
GO:0007005 0.658279
GO:0006096 0.658105
GO:1902532 0.657998
GO:0030855 0.657575
GO:0008584 0.657556
GO:2000352 0.657355
GO:0007435 0.657077
GO:0001779 0.656818
GO:0031109 0.656514
GO:0010592 0.655909
GO:0009743 0.655745
GO:0007416 0.655581
GO:0009725 0.655496
GO:0033619 0.655455
GO:0051649 0.655238
GO:0048589 0.655086
GO:0050870 0.654628
GO:0016579 0.654628
GO:0000122 0.654533
GO:0042110 0.654528
GO:0051092 0.654527
GO:0006606 0.654343
GO:0035265 0.653738
GO:1903078 0.653463
GO:0051403 0.653410
GO:0048167 0.653211
GO:0072239 0.653040
GO:0051641 0.652254
GO:0001755 0.652116
GO:0071887 0.651442
GO:1902036 0.651376
GO:0010632 0.651373
GO:0046330 0.651285
GO:0040008 0.650683
GO:0008610 0.650369
GO:0009266 0.650026
GO:0045944 0.649968
GO:0032516 0.648208
GO:0071333 0.648163
GO:0043536 0.647631
GO:0051924 0.647554
GO:1901135 0.647328
GO:0060571 0.647196
GO:0043524 0.646824
GO:0007423 0.646708
GO:0043473 0.646277
GO:0006470 0.646247
GO:0050910 0.646230
GO:1901031 0.646116
GO:0007275 0.645970
GO:0030335 0.645099
GO:0050808 0.645046
GO:0006915 0.644861
GO:0022414 0.644656
GO:0031663 0.644545
GO:0030521 0.643731
GO:0007219 0.642491
GO:0009968 0.641423
GO:0035556 0.641296
GO:0098609 0.640468
GO:0007399 0.639791
GO:0048714 0.639690
GO:0002244 0.639418
GO:0002320 0.639126
GO:0044281 0.639080
GO:0007584 0.639050
GO:0034766 0.639001
GO:0051247 0.638756
GO:1904062 0.638507
GO:2000278 0.637619
GO:0051174 0.637419
GO:0045165 0.637385
GO:0042593 0.636878
GO:0007166 0.636669
GO:0007265 0.636452
GO:0051209 0.635222
GO:0002548 0.634638
GO:0010508 0.634557
GO:0050821 0.634304
GO:0034605 0.634286
GO:0045740 0.632009
GO:0008284 0.631881
GO:0045055 0.631843
GO:0002443 0.631592
GO:0016477 0.631051
GO:0006355 0.630857
GO:0048565 0.630000
GO:0002274 0.629372
GO:0071549 0.627954
GO:0042325 0.627694
GO:0097021 0.627451
GO:0051050 0.627173
GO:0003376 0.627083
GO:0006954 0.626960
GO:0050673 0.626506
GO:1904646 0.626432
GO:0048149 0.625382
GO:0045596 0.624389
GO:0006357 0.624002
GO:0050776 0.623673
GO:0001822 0.622805
GO:0071222 0.622348
GO:0070933 0.621818
GO:0046486 0.621699
GO:0043542 0.621690
GO:0048878 0.621353
GO:1902275 0.621331
GO:1901300 0.620545
GO:0031016 0.620271
GO:0000302 0.619605
GO:0001763 0.619510
GO:0043161 0.619160
GO:1900272 0.618401
GO:0048011 0.618401
GO:0042752 0.618397
GO:0006338 0.618121
GO:0010033 0.617717
GO:0033690 0.616364
GO:0000226 0.616277
GO:0035304 0.616266
GO:0051145 0.615603
GO:2000251 0.615194
GO:0071300 0.615024
GO:0070997 0.614605
GO:0033157 0.614122
GO:0009410 0.613962
GO:0008277 0.613914
GO:0070527 0.612601
GO:0019752 0.612179
GO:0006909 0.612093
GO:0030072 0.611897
GO:0021795 0.611157
GO:0008064 0.610543
GO:0010921 0.610538
GO:1905564 0.609854
GO:0097191 0.608829
GO:0006412 0.608810
GO:0030705 0.608111
GO:0060976 0.607966
GO:0002682 0.607900
GO:0007517 0.607632
GO:0032869 0.606959
GO:0006886 0.606403
GO:2001241 0.606335
GO:0060374 0.605392
GO:0007612 0.604907
GO:0010941 0.604825
GO:0030183 0.604706
GO:0014070 0.604053
GO:1903578 0.603642
GO:0002862 0.602844
GO:0051098 0.602127
GO:0006511 0.601368
GO:0051341 0.601133
GO:0042551 0.601132
GO:0098586 0.600000
GO:0045637 0.599805
GO:0002250 0.599773
GO:0006898 0.598760
GO:0034767 0.598730
GO:0071417 0.598465
GO:0006810 0.597926
GO:0051171 0.597884
GO:0006629 0.597410
GO:0045766 0.596975
GO:0050890 0.596859
GO:0043065 0.596390
GO:0043066 0.595845
GO:0055085 0.595677
GO:0002376 0.595288
GO:0034599 0.594821
GO:0007283 0.594381
GO:0051353 0.593750
GO:1900087 0.593301
GO:0043467 0.592435
GO:0051146 0.592403
GO:0060840 0.592220
GO:0021953 0.591719
GO:0043534 0.591560
GO:0060416 0.590920
GO:0033365 0.590514
GO:0030217 0.590422
GO:0042391 0.590414
GO:0030168 0.589207
GO:2001020 0.587943
GO:0031175 0.586994
GO:0043170 0.586341
GO:0046034 0.585555
GO:0060997 0.585305
GO:2001214 0.585183
GO:0001818 0.585168
GO:0007411 0.584390
GO:0001942 0.583333
GO:0001819 0.582512
GO:0032148 0.582440
GO:0048638 0.581889
GO:0050804 0.581178
GO:0051702 0.580092
GO:0048468 0.579902
GO:0032922 0.579511
GO:0060644 0.578683
GO:0003158 0.578678
GO:0033141 0.578180
GO:0006914 0.577035
GO:0045821 0.576588
GO:0048557 0.576058
GO:0032388 0.575792
GO:0060485 0.574925
GO:0034446 0.574457
GO:0008104 0.573826
GO:0019318 0.572492
GO:0043547 0.571554
GO:0035249 0.570909
GO:0045732 0.570720
GO:0051046 0.570516
GO:0009416 0.570369
GO:0040016 0.568947
GO:0007596 0.568801
GO:0030900 0.568727
GO:0051494 0.568682
GO:2000377 0.567752
GO:0050731 0.564365
GO:0030218 0.563725
GO:0032024 0.563600
GO:0043114 0.563304
GO:0031333 0.562814
GO:0042180 0.558869
GO:0030162 0.558832
GO:0033044 0.558828
GO:0007507 0.558717
GO:0042307 0.557237
GO:0046718 0.557236
GO:0000723 0.556190
GO:0050770 0.555122
GO:0033043 0.554784
GO:0008016 0.554545
GO:0034394 0.553704
GO:0006281 0.553333
GO:0032467 0.552727
GO:0051047 0.552411
GO:0048661 0.551442
GO:0018107 0.550802
GO:0023019 0.550078
GO:0009165 0.550021
GO:0043154 0.549805
GO:0050920 0.549434
GO:0002685 0.546958
GO:0120162 0.546258
GO:0035860 0.546003
GO:0033689 0.545455
GO:0097067 0.545429
GO:0045597 0.544127
GO:0043303 0.542609
GO:0048103 0.542392
GO:0006997 0.537152
GO:0016570 0.536582
GO:0007610 0.536043
GO:0006508 0.532807
GO:0009259 0.531674
GO:0046890 0.531163
GO:0048511 0.530805
GO:0050729 0.529915
GO:0002821 0.529412
GO:0008285 0.529191
GO:0030522 0.529167
GO:0000724 0.528909
GO:0060749 0.527903
GO:0030041 0.527316
GO:0001525 0.527054
GO:0046474 0.525346
GO:0090314 0.524961
GO:0009306 0.524257
GO:0009617 0.522448
GO:0050790 0.522120
GO:0071407 0.520746
GO:0045833 0.519718
GO:0001938 0.519459
GO:0007156 0.518641
GO:0045793 0.517558
GO:0010976 0.517231
GO:0005975 0.516369
GO:0007269 0.515212
GO:0043392 0.513571
GO:0038096 0.512566
GO:0001764 0.511669
GO:0016241 0.511429
GO:0010243 0.511047
GO:0007173 0.510875
GO:0048704 0.510703
GO:0016567 0.509046
GO:0036092 0.507680
GO:0034504 0.506947
GO:0032092 0.505356
GO:0005984 0.504359
GO:0007568 0.504167
GO:0023061 0.503536
GO:0090050 0.503297
GO:1904659 0.502831
GO:0090042 0.502698
GO:0032729 0.501241
GO:0051928 0.499339
GO:0060627 0.498530
GO:0002064 0.498000
GO:0046488 0.497770
GO:0030032 0.495934
GO:0042113 0.495851
GO:0045739 0.494860
GO:0033627 0.494545
GO:0032409 0.493643
GO:0043086 0.492164
GO:0008542 0.490291
GO:0002684 0.489269
GO:0007422 0.487158
GO:1902904 0.487132
GO:0008202 0.485860
GO:0001649 0.485847
GO:0017157 0.485501
GO:1905563 0.484813
GO:0035051 0.484392
GO:0007160 0.484360
GO:0022407 0.483493
GO:0045087 0.482222
GO:1902903 0.481612
GO:0031398 0.479421
GO:0030838 0.479091
GO:0051497 0.479070
GO:0060048 0.478199
GO:0045471 0.476308
GO:0042220 0.475867
GO:0051640 0.475636
GO:0060173 0.475105
GO:0001889 0.475105
GO:0007165 0.474850
GO:0033002 0.474593
GO:0050863 0.472539
GO:0071383 0.471842
GO:0019233 0.470455
GO:0006417 0.468983
GO:0036473 0.467985
GO:0042093 0.467572
GO:0031529 0.467572
GO:0035754 0.467572
GO:0007159 0.467399
GO:0003014 0.467106
GO:0060079 0.466856
GO:0046677 0.463263
GO:0043254 0.462665
GO:0007179 0.462000
GO:0006457 0.461538
GO:0060395 0.461364
GO:0061351 0.461187
GO:2000270 0.461036
GO:0050852 0.459040
GO:0048738 0.459003
GO:0002683 0.458222
GO:2000300 0.457866
GO:0048709 0.456667
GO:0015031 0.455872
GO:0002819 0.455158
GO:1900407 0.453919
GO:0090068 0.453882
GO:1900180 0.453814
GO:0002218 0.453729
GO:0050680 0.453056
GO:0045429 0.451983
GO:0016071 0.451190
GO:0032743 0.448319
GO:0060560 0.448152
GO:0050778 0.447781
GO:0048484 0.447727
GO:0016236 0.447288
GO:0043525 0.445941
GO:0032091 0.445413
GO:0060291 0.445386
GO:0006163 0.444193
GO:0006694 0.442936
GO:0051000 0.442791
GO:0009755 0.441333
GO:0006753 0.441130
GO:0070301 0.439854
GO:0002040 0.439390
GO:0050896 0.439380
GO:0032760 0.438914
GO:0007626 0.438411
GO:0021987 0.437111
GO:0048675 0.436696
GO:0043434 0.435676
GO:0006401 0.435035
GO:0034405 0.432857
GO:0045907 0.431957
GO:0043124 0.430166
GO:0006352 0.430046
GO:0051017 0.428718
GO:0090280 0.428611
GO:0015980 0.428571
GO:0042310 0.428571
GO:0060045 0.426694
GO:0031397 0.424357
GO:0045347 0.424091
GO:0042632 0.423328
GO:0031056 0.421552
GO:0010951 0.420635
GO:0051249 0.420140
GO:0051090 0.418732
GO:0044262 0.415971
GO:0032879 0.414881
GO:0007162 0.414634
GO:0021766 0.412587
GO:0030282 0.407743
GO:0099504 0.406512
GO:0003007 0.401092
GO:0033143 0.396899
GO:0007158 0.395676
GO:0046942 0.395260
GO:0007229 0.394841
GO:0031334 0.393614
GO:0097009 0.391034
GO:0060021 0.388435
GO:0060041 0.386626
GO:0097190 0.384703
GO:0030048 0.384615
GO:0006298 0.384370
GO:0016042 0.384148
GO:0030101 0.382547
GO:0034103 0.381289
GO:0070102 0.380000
GO:0033628 0.377171
GO:0001894 0.372563
GO:0030512 0.371145
GO:0007498 0.367991
GO:0008361 0.367423
GO:0046620 0.365476
GO:0031295 0.364518
GO:0001892 0.355979
GO:0055119 0.353947
GO:0032436 0.353421
GO:0006919 0.352725
GO:0060444 0.347273
GO:0060740 0.331825
GO:0046854 0.326276
GO:0019722 0.322272
GO:0016485 0.318636
GO:0051302 0.314545
GO:0006869 0.299648
GO:0000077 0.284021
Code
print("There are " +str(len(GO_terms_auc_svm_df))+ " svm models.")
There are 939 svm models.
Code
# only keep goterms that have a model 
platt_matrix = platt_matrix.loc[list(GO_terms_auc_svm_df.index),:]
distance_matrix = distance_matrix.loc[list(GO_terms_auc_svm_df.index),:]
slim_matrix_single_neuron  = slim_matrix_single_neuron.loc[list(GO_terms_auc_svm_df.index),:]
preds_svm_matrix  = preds_svm_matrix.loc[list(GO_terms_auc_svm_df.index),:]
delta_logits_matrix  = delta_logits_matrix.loc[list(GO_terms_auc_svm_df.index),:]

AUC histogram

Code
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(GO_terms_auc_svm_df[GO_terms_auc_svm_df["auc"]>0.69])/len(GO_terms_auc_svm_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of GO term models", fontsize=20)
colors2 = {'GO term models with AUC>=0.7':CB_color_cycle[2]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.71, 8, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance of the models using expression", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'modelsAUCsvm.png', transparent=True)

AUC waterfall plot

Code
GO_terms_auc_svm_df =GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)
Code
plt.rcParams['figure.figsize'] = (12, 9)
drugs = GO_terms_auc_svm_df.index
rhos = GO_terms_auc_svm_df["auc"]

percentage = round((sum(rhos>0.69)/len(rhos))*100,1)

fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#6492CA" for x in rhos ]
ax.bar(
    x=drugs,
    height=rhos,
    edgecolor=colors,
    linewidth=2
)
plt.xticks([])
plt.yticks(fontsize=28)


# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)


# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('SVM models', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
             weight='bold')

colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')

plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM.png', transparent=True)

AUC boxplot by parents

Code
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_auc_svm_df.index)):
    term = GO_terms_auc_svm_df.index[i]
    number_parents[GO_terms_auc_svm_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_auc_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_auc_svm_df = pd.concat([GO_terms_auc_svm_df, levels,number_parents], axis=1)
GO_terms_auc_svm_df.columns = ["auc","levels","parents"]
Code
GO_terms_auc_svm_df
auc levels parents
GO:0036289 0.999708 0 2
GO:0060440 0.994743 0 4
GO:0042149 0.971292 0 1
GO:1902455 0.969545 0 2
GO:0001556 0.965979 0 6
GO:0045636 0.955115 0 6
GO:0010750 0.955000 0 4
GO:0060020 0.949434 0 1
GO:1902042 0.945804 0 4
GO:1902236 0.941667 0 12
GO:0070059 0.936432 1 2
GO:0051453 0.935521 1 2
GO:0042659 0.931364 0 3
GO:0006360 0.930046 2 7
GO:0006959 0.921730 2 2
GO:0051607 0.920817 2 2
GO:1903800 0.920626 0 13
GO:0090201 0.912623 0 9
GO:0031047 0.911775 2 1
GO:2001021 0.910002 1 4
GO:1901029 0.910000 0 12
GO:0071353 0.909591 1 2
GO:0010832 0.909091 0 5
GO:0032469 0.903519 1 4
GO:0021695 0.901667 1 2
GO:0007059 0.892878 3 1
GO:0030890 0.892045 0 11
GO:0046898 0.888810 0 4
GO:0060632 0.886818 1 3
GO:0034983 0.883182 0 5
GO:0007617 0.883182 1 3
GO:0000423 0.882273 1 2
GO:0032147 0.881504 1 1
GO:1905278 0.881347 0 5
GO:0071670 0.879315 0 2
GO:0002326 0.879244 0 2
GO:1905710 0.876905 1 1
GO:0046666 0.875142 0 5
GO:1900118 0.873810 0 3
GO:0007263 0.873636 1 1
GO:0071364 0.872248 0 3
GO:0035195 0.869873 1 5
GO:0016573 0.868132 3 6
GO:0001662 0.866835 1 3
GO:2000134 0.865329 1 9
GO:0010559 0.862662 1 7
GO:0010575 0.860812 0 4
GO:0031640 0.860238 1 1
GO:0046902 0.858738 1 4
GO:0010971 0.858182 0 10
GO:0071480 0.855476 0 2
GO:0050864 0.854491 2 2
GO:0070842 0.853636 0 1
GO:0072384 0.853182 2 4
GO:2000739 0.852446 0 2
GO:1902459 0.852273 0 2
GO:0042177 0.852130 1 7
GO:0000086 0.851667 1 4
GO:0043552 0.850086 0 8
GO:0007528 0.849699 1 1
GO:0048008 0.844833 1 1
GO:0051384 0.844332 1 3
GO:0140013 0.842435 2 5
GO:0006396 0.842431 4 2
GO:0010039 0.842031 1 1
GO:0001501 0.840636 4 2
GO:0043922 0.839545 0 2
GO:0001658 0.837219 1 12
GO:0048170 0.836740 0 2
GO:0035733 0.836509 0 1
GO:2000379 0.836190 1 4
GO:0045737 0.835954 0 8
GO:0008637 0.835558 1 2
GO:0042771 0.835384 1 3
GO:0035025 0.833857 0 6
GO:0048536 0.833591 0 3
GO:0035584 0.831190 0 1
GO:0030513 0.830881 0 7
GO:0090037 0.830315 0 4
GO:0060766 0.830000 0 6
GO:1990403 0.830000 0 1
GO:0035162 0.829268 1 2
GO:0006261 0.828975 2 1
GO:0048538 0.826111 0 3
GO:0043162 0.825455 1 1
GO:1902166 0.825017 0 14
GO:0090141 0.824197 0 6
GO:0001782 0.822096 0 2
GO:1902533 0.820384 2 5
GO:0030593 0.820257 1 2
GO:0060312 0.820093 0 4
GO:0042472 0.819091 1 5
GO:0014068 0.817376 0 4
GO:0045727 0.816138 1 12
GO:0051926 0.814857 1 5
GO:0046326 0.814692 0 5
GO:0098780 0.814091 1 1
GO:0070932 0.813182 0 1
GO:0035855 0.812759 0 5
GO:0007030 0.812298 1 2
GO:0061734 0.812273 0 3
GO:0001569 0.811348 0 7
GO:0031648 0.810748 0 1
GO:0042733 0.810455 0 5
GO:0006839 0.807167 2 3
GO:0033028 0.807100 1 1
GO:0008625 0.807000 1 1
GO:0042552 0.806818 2 2
GO:0045987 0.806685 1 4
GO:0048743 0.805650 0 10
GO:1904950 0.803419 2 3
GO:0046329 0.802727 1 9
GO:0070584 0.802619 0 3
GO:2001243 0.802220 2 9
GO:0070528 0.802195 1 1
GO:0016575 0.801843 2 5
GO:0002931 0.801321 0 1
GO:0031069 0.801131 0 5
GO:0008045 0.799917 1 1
GO:0035790 0.798930 0 1
GO:0006310 0.798815 3 3
GO:0035788 0.798398 0 3
GO:0006576 0.797944 2 2
GO:0071900 0.797738 2 2
GO:0000209 0.797569 2 1
GO:0060789 0.797273 0 3
GO:0048701 0.796899 1 2
GO:0051301 0.795132 2 1
GO:0006367 0.794643 2 8
GO:0008210 0.794343 1 3
GO:0014827 0.792599 0 1
GO:0001824 0.792431 1 2
GO:0060325 0.792431 0 2
GO:2001234 0.792173 3 7
GO:0018205 0.791850 4 4
GO:0000165 0.791809 3 1
GO:0045668 0.791352 0 3
GO:0002437 0.790698 2 3
GO:0043407 0.790675 1 5
GO:1905065 0.789285 0 7
GO:1900020 0.786818 0 2
GO:0043029 0.786732 1 2
GO:1990384 0.786426 0 4
GO:0001666 0.786045 2 1
GO:0038083 0.785139 0 2
GO:1901224 0.785000 0 3
GO:2001240 0.784587 0 5
GO:0035909 0.784038 1 4
GO:0060348 0.783839 3 2
GO:2000010 0.783765 0 5
GO:0048812 0.783570 3 4
GO:0060271 0.782492 3 2
GO:0031532 0.782467 1 2
GO:0021575 0.781469 1 3
GO:0071277 0.780423 0 3
GO:0071456 0.780420 1 4
GO:0007266 0.780167 1 1
GO:1903010 0.779746 0 3
GO:0051770 0.779328 0 4
GO:0032386 0.778379 2 5
GO:0007286 0.777686 1 5
GO:0051902 0.776584 0 4
GO:0035234 0.776581 0 3
GO:0051973 0.775994 0 8
GO:0030193 0.775993 2 4
GO:0006937 0.774061 2 2
GO:0006612 0.773819 1 4
GO:0070373 0.773756 0 3
GO:0030878 0.773333 0 2
GO:0071839 0.773261 0 1
GO:1903146 0.772727 1 6
GO:0009791 0.772527 1 2
GO:0002718 0.772512 2 8
GO:0006303 0.770833 1 1
GO:1904019 0.770060 1 1
GO:0090398 0.768549 1 2
GO:0006996 0.767923 5 1
GO:0018108 0.767679 3 5
GO:0045444 0.767619 2 1
GO:0051225 0.766780 2 4
GO:0031099 0.766709 2 1
GO:0042060 0.766340 4 1
GO:0030336 0.765324 2 3
GO:0014823 0.764622 1 1
GO:0048010 0.764467 1 1
GO:0010038 0.763667 2 1
GO:0048839 0.763532 2 2
GO:0007098 0.763143 2 3
GO:0007202 0.762897 0 2
GO:0070374 0.762346 0 3
GO:0002318 0.761905 0 1
GO:0010718 0.761098 1 4
GO:0000422 0.760897 2 4
GO:0046777 0.760636 1 1
GO:0034976 0.760360 3 1
GO:0071320 0.760181 0 7
GO:0072284 0.758906 0 6
GO:0051150 0.758697 1 2
GO:0072210 0.757824 1 2
GO:0001823 0.757802 2 1
GO:0001934 0.757567 3 11
GO:2000811 0.757446 0 4
GO:0072073 0.757016 2 2
GO:1905897 0.756979 2 4
GO:0006260 0.756188 3 3
GO:0030316 0.756145 2 1
GO:0060612 0.756115 1 2
GO:0033327 0.755659 0 4
GO:0043406 0.755481 1 5
GO:0035924 0.754939 2 1
GO:0034097 0.754808 3 1
GO:0030325 0.754299 0 2
GO:0007026 0.753636 0 9
GO:0030308 0.753278 1 3
GO:0038033 0.753247 0 9
GO:0043129 0.752058 0 2
GO:0071276 0.751861 0 3
GO:0050792 0.751758 2 2
GO:0043586 0.750958 1 1
GO:1903077 0.750714 1 6
GO:0021549 0.749863 2 2
GO:0034502 0.749669 2 1
GO:0030318 0.749475 1 2
GO:0006975 0.748617 0 2
GO:1901990 0.747843 2 6
GO:0006270 0.747613 1 4
GO:0070588 0.747355 4 3
GO:0002053 0.747159 0 2
GO:0051056 0.747105 3 2
GO:0002366 0.746692 3 4
GO:0030324 0.746275 2 2
GO:0001701 0.746078 2 1
GO:0072659 0.745865 2 3
GO:0051354 0.745455 1 2
GO:0016239 0.745323 1 3
GO:0016032 0.744154 3 1
GO:0035094 0.743843 1 1
GO:1901796 0.743803 1 2
GO:1901987 0.743725 3 2
GO:0051147 0.742794 2 3
GO:0050769 0.741231 2 7
GO:0044770 0.740140 4 2
GO:0045747 0.739761 0 5
GO:0030307 0.738934 2 3
GO:1900006 0.738896 0 5
GO:0002327 0.738855 0 1
GO:0031274 0.738194 0 3
GO:0050921 0.737977 1 3
GO:0060562 0.737526 2 4
GO:0002062 0.737273 2 2
GO:0035994 0.736442 1 1
GO:0048286 0.734524 1 2
GO:0007259 0.734203 2 1
GO:0007186 0.733191 3 1
GO:0043408 0.733015 2 3
GO:0006368 0.732798 1 7
GO:0022612 0.732716 2 2
GO:0002009 0.732495 3 2
GO:0000082 0.732331 2 4
GO:2001236 0.732323 2 6
GO:0007585 0.732118 1 1
GO:0036120 0.732047 0 3
GO:0031507 0.732041 1 6
GO:0045595 0.731846 4 2
GO:1901988 0.731842 2 6
GO:0051051 0.731556 3 3
GO:0090630 0.731009 0 1
GO:0045860 0.730882 2 8
GO:0046427 0.730341 1 6
GO:0042475 0.729571 2 1
GO:0043627 0.729551 1 1
GO:0045930 0.729357 2 5
GO:0001952 0.728961 1 3
GO:0006605 0.728584 2 1
GO:0000902 0.728556 4 1
GO:0072593 0.728121 3 1
GO:0030216 0.727941 2 3
GO:0031667 0.727679 4 1
GO:0090184 0.727501 0 2
GO:0038007 0.727501 0 1
GO:0051899 0.727139 2 1
GO:0097193 0.727005 3 2
GO:0007010 0.726465 4 1
GO:0030509 0.726433 1 4
GO:0045088 0.726190 2 4
GO:0032740 0.725993 0 4
GO:0055118 0.725993 0 6
GO:0034220 0.725642 5 2
GO:0030010 0.725581 1 1
GO:0007169 0.725512 3 1
GO:2001257 0.725455 2 6
GO:0071363 0.725102 3 2
GO:0007049 0.724828 6 1
GO:0014911 0.724739 1 4
GO:0033993 0.724467 3 1
GO:0006275 0.723952 2 6
GO:0035767 0.723773 1 2
GO:0010564 0.722387 4 3
GO:0007018 0.722171 4 1
GO:0032880 0.722120 4 2
GO:0008544 0.722095 3 1
GO:0001843 0.721713 1 8
GO:0010212 0.721697 1 1
GO:0045580 0.721626 2 10
GO:0030198 0.721591 2 1
GO:0016055 0.721321 2 2
GO:0010977 0.720930 1 4
GO:0007519 0.720733 2 2
GO:0007565 0.720376 2 2
GO:0046883 0.720310 3 5
GO:0071392 0.720147 0 6
GO:0060437 0.719691 0 2
GO:0006807 0.719387 7 1
GO:0046632 0.718754 2 2
GO:0032956 0.718570 3 3
GO:0071897 0.718438 2 5
GO:0032967 0.717999 0 3
GO:0048041 0.717262 1 3
GO:0030163 0.717042 4 4
GO:0050853 0.716887 1 4
GO:0010638 0.716638 2 4
GO:0016601 0.716440 2 1
GO:0001656 0.716092 2 1
GO:0048477 0.716053 1 3
GO:0021782 0.715455 2 3
GO:0051054 0.715000 2 6
GO:0032940 0.714799 5 2
GO:0016925 0.714545 1 5
GO:0042531 0.714539 0 4
GO:0043549 0.713862 3 2
GO:0010507 0.713774 1 4
GO:0040018 0.713171 0 5
GO:0038084 0.712982 1 2
GO:0050900 0.712940 3 2
GO:0048873 0.712279 0 2
GO:0090263 0.712002 0 5
GO:0072655 0.711364 1 3
GO:0043966 0.711157 2 1
GO:0010467 0.711069 5 1
GO:2000773 0.711008 0 3
GO:0016572 0.710407 1 2
GO:0033138 0.710145 1 3
GO:0046578 0.709704 2 2
GO:0036324 0.709154 0 1
GO:0046631 0.708211 3 1
GO:0051321 0.707683 3 2
GO:0032008 0.707223 1 4
GO:0046651 0.707067 2 2
GO:0007346 0.707006 3 2
GO:0061029 0.706909 0 3
GO:0071230 0.706594 1 4
GO:0002720 0.705912 1 11
GO:0010629 0.705797 3 4
GO:0034765 0.705101 4 5
GO:0035726 0.704893 0 1
GO:0010613 0.704696 1 3
GO:0045321 0.704539 5 2
GO:0031929 0.704054 2 1
GO:0043270 0.703842 3 3
GO:0019222 0.703719 7 1
GO:0006939 0.703424 2 1
GO:0046628 0.703369 0 6
GO:0061024 0.703188 2 1
GO:0060391 0.702866 0 6
GO:0051261 0.702830 2 1
GO:0006811 0.701553 6 1
GO:0008360 0.701514 0 3
GO:0060384 0.701453 1 2
GO:0055003 0.700996 0 8
GO:0007268 0.700781 4 1
GO:0010628 0.699521 3 4
GO:0006139 0.699478 6 2
GO:0006468 0.699029 5 4
GO:0060179 0.698636 0 1
GO:0048568 0.698434 3 2
GO:0043244 0.697851 2 2
GO:0051894 0.697683 0 7
GO:0006469 0.697613 2 14
GO:0034329 0.697061 2 1
GO:0007204 0.696417 4 4
GO:1903829 0.696396 3 3
GO:0072006 0.696198 2 2
GO:0032355 0.695494 1 3
GO:0048266 0.695194 0 3
GO:0008354 0.694969 0 3
GO:0045785 0.694047 3 2
GO:0009056 0.693798 5 1
GO:0048598 0.693287 4 2
GO:0034644 0.692308 1 2
GO:0048469 0.692299 2 2
GO:0051223 0.692235 3 5
GO:0065003 0.692223 4 1
GO:0000278 0.692038 4 1
GO:0031103 0.691333 1 4
GO:0007267 0.691304 5 1
GO:0048017 0.690794 1 1
GO:0055082 0.690784 5 2
GO:0051901 0.690122 0 4
GO:0051258 0.690099 3 1
GO:0001541 0.689675 1 4
GO:0006897 0.689347 3 1
GO:0001932 0.688849 4 7
GO:0070507 0.688415 2 4
GO:0050865 0.687711 5 1
GO:0007155 0.687639 5 1
GO:0060326 0.687327 2 3
GO:0048608 0.687212 2 3
GO:0035019 0.686954 1 1
GO:0009966 0.686876 5 3
GO:0050728 0.686281 2 3
GO:1903053 0.686275 1 2
GO:0050866 0.686189 2 2
GO:0051881 0.686055 1 1
GO:0050872 0.685780 0 1
GO:0008286 0.683527 1 2
GO:0007015 0.683431 3 2
GO:0045840 0.683252 1 9
GO:1902074 0.683239 1 1
GO:0035306 0.682977 1 3
GO:0099111 0.682737 3 2
GO:0019221 0.681500 2 3
GO:0035264 0.681434 1 2
GO:0010595 0.681090 2 5
GO:0006936 0.680440 3 1
GO:0051496 0.679907 0 10
GO:0002764 0.679018 3 2
GO:0010811 0.677971 1 3
GO:0035022 0.677728 0 7
GO:0008630 0.677216 2 2
GO:0099173 0.676905 2 2
GO:0051897 0.676314 0 3
GO:0061045 0.676257 2 3
GO:0030001 0.676160 5 1
GO:0051281 0.675909 1 14
GO:0009888 0.675500 4 1
GO:0016192 0.675421 4 1
GO:0009582 0.675325 2 1
GO:0010821 0.675098 2 2
GO:0016358 0.675073 3 2
GO:2000757 0.675000 1 7
GO:0006687 0.674981 2 4
GO:0048863 0.674715 2 1
GO:0001946 0.674123 0 3
GO:0010952 0.673571 2 8
GO:0010727 0.672727 0 4
GO:0009653 0.672628 5 1
GO:0030097 0.672283 4 3
GO:0001837 0.672128 2 2
GO:0060341 0.671960 3 3
GO:0048741 0.671918 1 5
GO:0032410 0.671818 1 3
GO:0051726 0.671467 5 2
GO:0033554 0.670452 4 2
GO:0070662 0.670073 0 1
GO:0050918 0.670060 1 2
GO:0032835 0.669444 1 2
GO:0050678 0.668997 3 2
GO:0001553 0.668960 0 5
GO:0018105 0.668010 2 5
GO:0050795 0.667848 2 2
GO:0001817 0.667273 3 4
GO:0046889 0.666977 1 7
GO:0007389 0.666190 3 2
GO:0009887 0.666023 4 2
GO:0030154 0.665761 6 1
GO:0033077 0.665433 1 1
GO:0007420 0.665270 4 3
GO:0019216 0.664614 3 3
GO:0031032 0.664097 2 2
GO:0009058 0.664086 5 1
GO:0051898 0.663949 0 3
GO:0090090 0.663876 0 3
GO:0043123 0.663876 0 3
GO:0019827 0.663747 2 1
GO:0070663 0.663664 2 1
GO:0003300 0.663430 2 1
GO:0051049 0.663173 5 2
GO:0002573 0.663121 3 2
GO:0006644 0.663072 3 2
GO:1904707 0.661818 0 3
GO:0003338 0.661761 1 3
GO:0071478 0.661745 2 2
GO:0051128 0.661709 5 1
GO:0006325 0.661626 3 2
GO:0042063 0.661539 3 2
GO:0016070 0.661175 5 2
GO:0120035 0.661093 3 2
GO:0001975 0.659722 0 1
GO:0030539 0.659695 0 4
GO:0044255 0.658997 4 2
GO:0071310 0.658915 4 3
GO:0001570 0.658593 1 4
GO:0030182 0.658548 5 2
GO:0048146 0.658343 0 2
GO:0007005 0.658279 3 1
GO:0006096 0.658105 1 8
GO:1902532 0.657998 3 3
GO:0030855 0.657575 3 2
GO:0008584 0.657556 1 4
GO:2000352 0.657355 0 5
GO:0007435 0.657077 1 3
GO:0001779 0.656818 1 4
GO:0031109 0.656514 2 1
GO:0010592 0.655909 0 4
GO:0009743 0.655745 2 2
GO:0007416 0.655581 1 3
GO:0009725 0.655496 4 2
GO:0033619 0.655455 1 1
GO:0051649 0.655238 4 2
GO:0048589 0.655086 4 1
GO:0050870 0.654628 2 12
GO:0016579 0.654628 1 5
GO:0000122 0.654533 1 11
GO:0042110 0.654528 4 1
GO:0051092 0.654527 0 2
GO:0006606 0.654343 1 9
GO:0035265 0.653738 2 2
GO:1903078 0.653463 1 7
GO:0051403 0.653410 2 3
GO:0048167 0.653211 2 2
GO:0072239 0.653040 0 5
GO:0051641 0.652254 5 1
GO:0001755 0.652116 1 5
GO:0071887 0.651442 2 1
GO:1902036 0.651376 0 3
GO:0010632 0.651373 3 3
GO:0046330 0.651285 0 9
GO:0040008 0.650683 3 1
GO:0008610 0.650369 3 3
GO:0009266 0.650026 2 1
GO:0045944 0.649968 2 11
GO:0032516 0.648208 0 12
GO:0071333 0.648163 1 6
GO:0043536 0.647631 1 4
GO:0051924 0.647554 3 3
GO:1901135 0.647328 4 1
GO:0060571 0.647196 1 1
GO:0043524 0.646824 1 5
GO:0007423 0.646708 3 1
GO:0043473 0.646277 2 1
GO:0006470 0.646247 3 4
GO:0050910 0.646230 0 4
GO:1901031 0.646116 1 3
GO:0007275 0.645970 7 1
GO:0030335 0.645099 3 3
GO:0050808 0.645046 3 1
GO:0006915 0.644861 5 1
GO:0022414 0.644656 4 1
GO:0031663 0.644545 1 2
GO:0030521 0.643731 1 3
GO:0007219 0.642491 1 1
GO:0009968 0.641423 4 4
GO:0035556 0.641296 4 1
GO:0098609 0.640468 4 1
GO:0007399 0.639791 6 2
GO:0048714 0.639690 0 9
GO:0002244 0.639418 2 2
GO:0002320 0.639126 1 1
GO:0044281 0.639080 5 1
GO:0007584 0.639050 3 2
GO:0034766 0.639001 1 7
GO:0051247 0.638756 4 5
GO:1904062 0.638507 3 3
GO:2000278 0.637619 1 7
GO:0051174 0.637419 6 2
GO:0045165 0.637385 3 2
GO:0042593 0.636878 2 1
GO:0007166 0.636669 4 1
GO:0007265 0.636452 3 1
GO:0051209 0.635222 3 11
GO:0002548 0.634638 1 2
GO:0010508 0.634557 2 4
GO:0050821 0.634304 0 1
GO:0034605 0.634286 1 3
GO:0045740 0.632009 1 3
GO:0008284 0.631881 2 1
GO:0045055 0.631843 2 2
GO:0002443 0.631592 4 1
GO:0016477 0.631051 4 1
GO:0006355 0.630857 4 9
GO:0048565 0.630000 1 2
GO:0002274 0.629372 2 1
GO:0071549 0.627954 0 8
GO:0042325 0.627694 5 2
GO:0097021 0.627451 0 1
GO:0051050 0.627173 4 3
GO:0003376 0.627083 0 2
GO:0006954 0.626960 3 1
GO:0050673 0.626506 4 1
GO:1904646 0.626432 0 4
GO:0048149 0.625382 0 3
GO:0045596 0.624389 2 3
GO:0006357 0.624002 3 8
GO:0050776 0.623673 4 4
GO:0001822 0.622805 3 2
GO:0071222 0.622348 2 6
GO:0070933 0.621818 0 1
GO:0046486 0.621699 3 1
GO:0043542 0.621690 3 2
GO:0048878 0.621353 6 1
GO:1902275 0.621331 1 2
GO:1901300 0.620545 0 9
GO:0031016 0.620271 2 1
GO:0000302 0.619605 2 1
GO:0001763 0.619510 2 2
GO:0043161 0.619160 2 7
GO:1900272 0.618401 0 3
GO:0048011 0.618401 1 3
GO:0042752 0.618397 2 2
GO:0006338 0.618121 2 1
GO:0010033 0.617717 5 1
GO:0033690 0.616364 0 2
GO:0000226 0.616277 3 2
GO:0035304 0.616266 2 7
GO:0051145 0.615603 2 2
GO:2000251 0.615194 0 5
GO:0071300 0.615024 0 4
GO:0070997 0.614605 2 1
GO:0033157 0.614122 1 6
GO:0009410 0.613962 2 1
GO:0008277 0.613914 2 2
GO:0070527 0.612601 1 2
GO:0019752 0.612179 4 2
GO:0006909 0.612093 2 1
GO:0030072 0.611897 3 3
GO:0021795 0.611157 1 4
GO:0008064 0.610543 2 4
GO:0010921 0.610538 2 3
GO:1905564 0.609854 0 3
GO:0097191 0.608829 3 2
GO:0006412 0.608810 4 6
GO:0030705 0.608111 3 3
GO:0060976 0.607966 1 3
GO:0002682 0.607900 5 2
GO:0007517 0.607632 3 1
GO:0032869 0.606959 2 7
GO:0006886 0.606403 3 6
GO:2001241 0.606335 0 4
GO:0060374 0.605392 0 1
GO:0007612 0.604907 1 2
GO:0010941 0.604825 5 1
GO:0030183 0.604706 1 4
GO:0014070 0.604053 4 1
GO:1903578 0.603642 1 2
GO:0002862 0.602844 1 7
GO:0051098 0.602127 3 1
GO:0006511 0.601368 3 6
GO:0051341 0.601133 2 1
GO:0042551 0.601132 1 3
GO:0098586 0.600000 1 2
GO:0045637 0.599805 2 6
GO:0002250 0.599773 4 2
GO:0006898 0.598760 2 1
GO:0034767 0.598730 2 7
GO:0071417 0.598465 3 4
GO:0006810 0.597926 7 1
GO:0051171 0.597884 6 2
GO:0006629 0.597410 5 1
GO:0045766 0.596975 1 4
GO:0050890 0.596859 2 1
GO:0043065 0.596390 2 3
GO:0043066 0.595845 4 3
GO:0055085 0.595677 6 2
GO:0002376 0.595288 6 1
GO:0034599 0.594821 3 3
GO:0007283 0.594381 2 2
GO:0051353 0.593750 1 3
GO:1900087 0.593301 0 10
GO:0043467 0.592435 2 2
GO:0051146 0.592403 2 2
GO:0060840 0.592220 2 2
GO:0021953 0.591719 2 4
GO:0043534 0.591560 2 1
GO:0060416 0.590920 1 1
GO:0033365 0.590514 3 3
GO:0030217 0.590422 3 4
GO:0042391 0.590414 3 1
GO:0030168 0.589207 2 2
GO:2001020 0.587943 2 3
GO:0031175 0.586994 4 3
GO:0043170 0.586341 7 1
GO:0046034 0.585555 2 1
GO:0060997 0.585305 1 6
GO:2001214 0.585183 0 3
GO:0001818 0.585168 1 4
GO:0007411 0.584390 2 7
GO:0001942 0.583333 1 3
GO:0001819 0.582512 2 4
GO:0032148 0.582440 0 1
GO:0048638 0.581889 2 3
GO:0050804 0.581178 3 3
GO:0051702 0.580092 2 1
GO:0048468 0.579902 5 2
GO:0032922 0.579511 0 5
GO:0060644 0.578683 0 3
GO:0003158 0.578678 2 1
GO:0033141 0.578180 0 6
GO:0006914 0.577035 4 2
GO:0045821 0.576588 0 16
GO:0048557 0.576058 0 7
GO:0032388 0.575792 1 6
GO:0060485 0.574925 3 2
GO:0034446 0.574457 1 3
GO:0008104 0.573826 5 1
GO:0019318 0.572492 2 2
GO:0043547 0.571554 1 2
GO:0035249 0.570909 1 1
GO:0045732 0.570720 2 8
GO:0051046 0.570516 4 2
GO:0009416 0.570369 2 1
GO:0040016 0.568947 0 2
GO:0007596 0.568801 3 2
GO:0030900 0.568727 3 2
GO:0051494 0.568682 2 5
GO:2000377 0.567752 2 3
GO:0050731 0.564365 2 3
GO:0030218 0.563725 1 4
GO:0032024 0.563600 1 12
GO:0043114 0.563304 1 1
GO:0031333 0.562814 2 4
GO:0042180 0.558869 3 2
GO:0030162 0.558832 3 6
GO:0033044 0.558828 2 2
GO:0007507 0.558717 4 2
GO:0042307 0.557237 0 17
GO:0046718 0.557236 1 2
GO:0000723 0.556190 1 4
GO:0050770 0.555122 2 8
GO:0033043 0.554784 4 2
GO:0008016 0.554545 3 1
GO:0034394 0.553704 1 3
GO:0006281 0.553333 2 4
GO:0032467 0.552727 0 6
GO:0051047 0.552411 3 3
GO:0048661 0.551442 1 3
GO:0018107 0.550802 1 5
GO:0023019 0.550078 0 4
GO:0009165 0.550021 3 5
GO:0043154 0.549805 1 4
GO:0050920 0.549434 2 2
GO:0002685 0.546958 2 5
GO:0120162 0.546258 0 2
GO:0035860 0.546003 0 1
GO:0033689 0.545455 0 2
GO:0097067 0.545429 0 3
GO:0045597 0.544127 3 3
GO:0043303 0.542609 1 8
GO:0048103 0.542392 0 1
GO:0006997 0.537152 2 1
GO:0016570 0.536582 4 4
GO:0007610 0.536043 3 1
GO:0006508 0.532807 4 3
GO:0009259 0.531674 3 3
GO:0046890 0.531163 2 4
GO:0048511 0.530805 3 1
GO:0050729 0.529915 1 3
GO:0002821 0.529412 2 3
GO:0008285 0.529191 3 1
GO:0030522 0.529167 2 1
GO:0000724 0.528909 1 2
GO:0060749 0.527903 0 1
GO:0030041 0.527316 2 2
GO:0001525 0.527054 2 3
GO:0046474 0.525346 2 5
GO:0090314 0.524961 0 14
GO:0009306 0.524257 3 4
GO:0009617 0.522448 3 2
GO:0050790 0.522120 4 1
GO:0071407 0.520746 3 2
GO:0045833 0.519718 2 4
GO:0001938 0.519459 1 3
GO:0007156 0.518641 1 1
GO:0045793 0.517558 0 1
GO:0010976 0.517231 1 4
GO:0005975 0.516369 4 1
GO:0007269 0.515212 2 6
GO:0043392 0.513571 1 2
GO:0038096 0.512566 0 5
GO:0001764 0.511669 1 3
GO:0016241 0.511429 2 5
GO:0010243 0.511047 4 2
GO:0007173 0.510875 2 1
GO:0048704 0.510703 2 5
GO:0016567 0.509046 3 4
GO:0036092 0.507680 0 1
GO:0034504 0.506947 2 1
GO:0032092 0.505356 1 2
GO:0005984 0.504359 0 2
GO:0007568 0.504167 1 1
GO:0023061 0.503536 4 2
GO:0090050 0.503297 0 5
GO:1904659 0.502831 1 2
GO:0090042 0.502698 1 4
GO:0032729 0.501241 0 4
GO:0051928 0.499339 2 3
GO:0060627 0.498530 3 3
GO:0002064 0.498000 2 2
GO:0046488 0.497770 2 2
GO:0030032 0.495934 1 1
GO:0042113 0.495851 3 1
GO:0045739 0.494860 1 10
GO:0033627 0.494545 2 1
GO:0032409 0.493643 3 1
GO:0043086 0.492164 3 2
GO:0008542 0.490291 0 4
GO:0002684 0.489269 4 3
GO:0007422 0.487158 2 3
GO:1902904 0.487132 2 3
GO:0008202 0.485860 3 2
GO:0001649 0.485847 1 2
GO:0017157 0.485501 2 5
GO:1905563 0.484813 0 3
GO:0035051 0.484392 2 2
GO:0007160 0.484360 2 1
GO:0022407 0.483493 3 3
GO:0045087 0.482222 3 3
GO:1902903 0.481612 3 2
GO:0031398 0.479421 2 8
GO:0030838 0.479091 0 11
GO:0051497 0.479070 0 8
GO:0060048 0.478199 2 2
GO:0045471 0.476308 1 2
GO:0042220 0.475867 1 3
GO:0051640 0.475636 3 1
GO:0060173 0.475105 1 2
GO:0001889 0.475105 1 2
GO:0007165 0.474850 6 2
GO:0033002 0.474593 2 1
GO:0050863 0.472539 3 2
GO:0071383 0.471842 2 6
GO:0019233 0.470455 1 1
GO:0006417 0.468983 3 9
GO:0036473 0.467985 2 2
GO:0042093 0.467572 1 6
GO:0031529 0.467572 1 1
GO:0035754 0.467572 0 1
GO:0007159 0.467399 3 1
GO:0003014 0.467106 2 1
GO:0060079 0.466856 1 5
GO:0046677 0.463263 1 1
GO:0043254 0.462665 3 3
GO:0007179 0.462000 1 4
GO:0006457 0.461538 1 1
GO:0060395 0.461364 1 2
GO:0061351 0.461187 3 1
GO:2000270 0.461036 0 4
GO:0050852 0.459040 1 4
GO:0048738 0.459003 2 2
GO:0002683 0.458222 3 3
GO:2000300 0.457866 1 14
GO:0048709 0.456667 2 5
GO:0015031 0.455872 4 3
GO:0002819 0.455158 3 2
GO:1900407 0.453919 2 4
GO:0090068 0.453882 2 4
GO:1900180 0.453814 1 3
GO:0002218 0.453729 1 6
GO:0050680 0.453056 2 3
GO:0045429 0.451983 0 5
GO:0016071 0.451190 4 1
GO:0032743 0.448319 0 4
GO:0060560 0.448152 3 2
GO:0050778 0.447781 3 5
GO:0048484 0.447727 0 3
GO:0016236 0.447288 3 1
GO:0043525 0.445941 0 5
GO:0032091 0.445413 1 2
GO:0060291 0.445386 1 4
GO:0006163 0.444193 3 3
GO:0006694 0.442936 2 4
GO:0051000 0.442791 0 2
GO:0009755 0.441333 2 4
GO:0006753 0.441130 4 3
GO:0070301 0.439854 1 4
GO:0002040 0.439390 1 1
GO:0050896 0.439380 7 1
GO:0032760 0.438914 0 4
GO:0007626 0.438411 2 1
GO:0021987 0.437111 2 2
GO:0048675 0.436696 2 8
GO:0043434 0.435676 3 3
GO:0006401 0.435035 3 6
GO:0034405 0.432857 1 1
GO:0045907 0.431957 0 2
GO:0043124 0.430166 0 3
GO:0006352 0.430046 3 7
GO:0051017 0.428718 1 2
GO:0090280 0.428611 0 3
GO:0015980 0.428571 3 1
GO:0042310 0.428571 1 1
GO:0060045 0.426694 0 11
GO:0031397 0.424357 1 7
GO:0045347 0.424091 0 4
GO:0042632 0.423328 0 1
GO:0031056 0.421552 2 6
GO:0010951 0.420635 2 5
GO:0051249 0.420140 4 3
GO:0051090 0.418732 2 2
GO:0044262 0.415971 3 2
GO:0032879 0.414881 6 1
GO:0007162 0.414634 2 2
GO:0021766 0.412587 1 3
GO:0030282 0.407743 1 2
GO:0099504 0.406512 2 2
GO:0003007 0.401092 3 2
GO:0033143 0.396899 1 4
GO:0007158 0.395676 0 1
GO:0046942 0.395260 3 2
GO:0007229 0.394841 1 1
GO:0031334 0.393614 2 4
GO:0097009 0.391034 0 1
GO:0060021 0.388435 1 1
GO:0060041 0.386626 1 3
GO:0097190 0.384703 4 2
GO:0030048 0.384615 2 1
GO:0006298 0.384370 0 1
GO:0016042 0.384148 2 3
GO:0030101 0.382547 2 1
GO:0034103 0.381289 1 1
GO:0070102 0.380000 0 3
GO:0033628 0.377171 1 3
GO:0001894 0.372563 2 1
GO:0030512 0.371145 0 8
GO:0007498 0.367991 3 1
GO:0008361 0.367423 2 1
GO:0046620 0.365476 1 3
GO:0031295 0.364518 0 8
GO:0001892 0.355979 1 5
GO:0055119 0.353947 1 1
GO:0032436 0.353421 1 14
GO:0006919 0.352725 1 3
GO:0060444 0.347273 1 8
GO:0060740 0.331825 1 6
GO:0046854 0.326276 1 2
GO:0019722 0.322272 2 1
GO:0016485 0.318636 3 5
GO:0051302 0.314545 1 2
GO:0006869 0.299648 3 2
GO:0000077 0.284021 1 3
Code
import plotly.express as px

c = ['#E8384F', '#FD817D', '#FDAE33',
         '#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]

df = px.data.tips()
fig = px.box(GO_terms_auc_svm_df, x="levels", y="auc",
             color="levels",
            color_discrete_sequence=c,
             width =600,
             height=400,
              template="simple_white",
              labels=dict(levels="Level of GO hierarchy", auc="AUC-ROC")
            )
fig.update_traces(width=0.9)

fig.add_shape( # add a horizontal "target" line
    type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)


fig.update_layout(
   title=dict(text="<b> AUC value grouped by level of GO hierarchy <b>",
             x=0.5,
             y=0.9,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
 #   yaxis_range=[min(yy.flatten()),max(yy.flatten())],
  #  xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(family='Roboto',color= "#36382E",size=15)
    )

fig.show()
pio.write_image(fig, resultsdir+"AUC_levels.png", width=600, height=400,scale=8)

TOP 15 PREDICTED GO TERMS

Code
top15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].index)

Get Top GO term names

Code
top15goterms_1 = []
for goterm in top15goterms:
    top15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_best = real_go_info[real_go_info.GO_term.isin(top15goterms_1)]
real_go_info_mod_best.GO_term = real_go_info_mod_best.GO_term.str.replace("_1","")
C:\Users\ksada\AppData\Local\Temp\8\ipykernel_21800\3048861904.py:5: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Code
top15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].reset_index()
top15goterms_auc.columns=["GO_term","auc","levels","parents"]
Code
top15goterms_auc = top15goterms_auc.merge(real_go_info_mod_best[real_go_info_mod_best["GO_term"].isin(top15goterms)], on="GO_term")
Code
top15goterms_auc
GO_term auc levels parents Name layer_number
0 GO:0036289 0.999708 0 2 Peptidyl-serine autophosphorylation (1) 0.0
1 GO:0060440 0.994743 0 4 Trachea formation (1) 0.0
2 GO:0042149 0.971292 0 1 Cellular response to glucose starvation (1) 0.0
3 GO:1902455 0.969545 0 2 Negative regulation of stem cell population maintenance (1) 0.0
4 GO:0001556 0.965979 0 6 Oocyte maturation (1) 0.0
5 GO:0045636 0.955115 0 6 Positive regulation of melanocyte differentiation (1) 0.0
6 GO:0010750 0.955000 0 4 Positive regulation of nitric oxide mediated signal transduction (1) 0.0
7 GO:0060020 0.949434 0 1 Bergmann glial cell differentiation (1) 0.0
8 GO:1902042 0.945804 0 4 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0
9 GO:1902236 0.941667 0 12 Negative regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway (1) 0.0
10 GO:0070059 0.936432 1 2 Intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress (1) 1.0
11 GO:0051453 0.935521 1 2 Regulation of intracellular ph (1) 1.0
12 GO:0042659 0.931364 0 3 Regulation of cell fate specification (1) 0.0
13 GO:0006360 0.930046 2 7 Transcription by rna polymerase i (1) 2.0
14 GO:0006959 0.921730 2 2 Humoral immune response (1) 2.0

WORST 15 PREDICTED GO TERMS

Code
worst15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].index)

Get Worst GO term names

Code
worst15goterms_1 = []
for goterm in worst15goterms:
    worst15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_worst = real_go_info[real_go_info.GO_term.isin(worst15goterms_1)]
real_go_info_mod_worst.GO_term = real_go_info_mod_worst.GO_term.str.replace("_1","")
C:\Users\ksada\AppData\Local\Temp\8\ipykernel_21800\988537561.py:5: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Code
worst15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].reset_index()
worst15goterms_auc.columns=["GO_term","auc","levels","parents"]
Code
worst15goterms_auc.merge(real_go_info_mod_worst[real_go_info_mod_worst["GO_term"].isin(worst15goterms)], on="GO_term")
GO_term auc levels parents Name layer_number
0 GO:0000077 0.284021 1 3 Dna damage checkpoint signaling (1) 1.0
1 GO:0006869 0.299648 3 2 Lipid transport (1) 3.0
2 GO:0051302 0.314545 1 2 Regulation of cell division (1) 1.0
3 GO:0016485 0.318636 3 5 Protein processing (1) 3.0
4 GO:0019722 0.322272 2 1 Calcium-mediated signaling (1) 2.0
5 GO:0046854 0.326276 1 2 Phosphatidylinositol phosphate biosynthetic process (1) 1.0
6 GO:0060740 0.331825 1 6 Prostate gland epithelium morphogenesis (1) 1.0
7 GO:0060444 0.347273 1 8 Branching involved in mammary gland duct morphogenesis (1) 1.0
8 GO:0006919 0.352725 1 3 Activation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
9 GO:0032436 0.353421 1 14 Positive regulation of proteasomal ubiquitin-dependent protein catabolic process (1) 1.0
10 GO:0055119 0.353947 1 1 Relaxation of cardiac muscle (1) 1.0
11 GO:0001892 0.355979 1 5 Embryonic placenta development (1) 1.0
12 GO:0031295 0.364518 0 8 T cell costimulation (1) 0.0
13 GO:0046620 0.365476 1 3 Regulation of organ growth (1) 1.0
14 GO:0008361 0.367423 2 1 Regulation of cell size (1) 2.0

AUPR histogram

Code
GO_terms_aupr_svm_df = pd.DataFrame(list(GO_terms_aupr_svm.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df = GO_terms_aupr_svm_df.dropna()
GO_terms_aupr_svm_df.sort_values(by=["aupr"], ascending=False).head()
aupr
goterm
GO:0036289 0.996209
GO:0006807 0.945077
GO:0050896 0.921869
GO:0043170 0.909722
GO:0009058 0.900903
Code
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_aupr_svm_df.index)):
    term = GO_terms_aupr_svm_df.index[i]
    number_parents[GO_terms_aupr_svm_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_aupr_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_aupr_svm_df = pd.concat([GO_terms_aupr_svm_df, levels,number_parents], axis=1)
GO_terms_aupr_svm_df.columns = ["aupr","levels","parents"]
Code
c = ['#E8384F', '#FD817D', '#FDAE33',
         '#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]

df = px.data.tips()
fig = px.box(GO_terms_aupr_svm_df, x="levels", y="aupr",
             color="levels",
            color_discrete_sequence=c,
             width =600,
             height=400,
              template="simple_white",
              labels=dict(levels="Level of GO hierarchy", aupr="AUPR")
            )
fig.update_traces(width=0.9)

fig.add_shape( # add a horizontal "target" line
    type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)


fig.update_layout(
   title=dict(text="<b> AUPR value grouped by level of GO hierarchy <b>",
             x=0.5,
             y=0.9,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
 #   yaxis_range=[min(yy.flatten()),max(yy.flatten())],
  #  xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(family='Roboto',color= "#36382E",size=15)
    )

fig.show()
pio.write_image(fig, resultsdir+"AUPR_levels.png", width=600, height=400,scale=8)

Example prediction

Code
def f2(goterm):    
    return goterm
Code
combobox_go = interactive(f2, goterm=widgets.Combobox(options=list(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

Code
display(combobox_go)
Code
selected_go = combobox_go.result
Code
#auc
plt.rcParams['figure.figsize'] = (4, 2)
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  platt_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  platt_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(platt_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

Code
#auc 
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  delta_logits_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  delta_logits_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(delta_logits_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

Code
plt.rcParams['figure.figsize'] = (2, 2)
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])
plt.grid(visible=None)

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)
Accuracy: 0.9782608695652174
Precision: 0.7894736842105263
Recall: 0.9375
AUC with score: 0.9947429906542057

TN - FP

FN - TP

Code
plt.rcParams['figure.figsize'] = (4, 2)
precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[selected_go],  preds_svm_matrix.loc[selected_go])
auc_precision_recall = metrics.auc(recall, precision)
plt.plot(recall, precision,label=str(auc_precision_recall))
plt.legend(loc=4)
plt.show()

METRICS drugs

Code
auc_drugs = {}
aupr_drugs = {}
precision_drugs = {}
for drug in list(slim_matrix_single_neuron.columns):
    if slim_matrix_single_neuron.loc[:,drug].sum() ==0:
        continue
    #fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,drug], logits_matrix.loc[:,drug])
    #auc_drugs[drug]  = metrics.auc(fpr, tpr)
    auc_drugs[drug] = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,drug],  platt_matrix.loc[:,drug])
    precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[:,drug],  platt_matrix.loc[:,drug])
    aupr_drugs[drug] = metrics.auc(recall, precision)
    precision_drugs[drug] = metrics.precision_score(slim_matrix_single_neuron.loc[:,drug],  preds_svm_matrix.loc[:,drug])

auc_drugs_df = pd.DataFrame(list(auc_drugs.items()),columns = ['goterm','auc']).set_index("goterm")
auc_drugs_df = auc_drugs_df.dropna()

aupr_drugs_df = pd.DataFrame(list(aupr_drugs.items()),columns = ['goterm','aupr']).set_index("goterm")
aupr_drugs_df = aupr_drugs_df.dropna()

precision_drugs_df = pd.DataFrame(list(precision_drugs.items()),columns = ['goterm','precision']).set_index("goterm")
precision_drugs_df = precision_drugs_df.dropna()

AUC histogram drugs

Code
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(auc_drugs_df[auc_drugs_df["auc"]>0.7])/len(auc_drugs_df)),2))+"%"
N, bins, patches = plt.hist(auc_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[5])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of drugs", fontsize=20)
colors2 = {'Drugs with AUC>=0.7':CB_color_cycle[5]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.79, 6, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance by drugs using mutations", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'drugsAUC.png', transparent=True)

AUC waterfall plot drugs

Code
auc_drugs_df =auc_drugs_df.sort_values(by=["auc"], ascending=False)
Code
plt.rcParams['figure.figsize'] = (12, 9)
drugs = auc_drugs_df.index
rhos = auc_drugs_df["auc"]

percentage = round((sum(rhos>0.69)/len(rhos))*100,1)

fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#B678BE" for x in rhos ]
ax.bar(
    x=drugs,
    height=rhos,
    edgecolor=colors,
    linewidth=3
)
plt.xticks([])
plt.yticks(fontsize=28)


# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)


# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('Drugs', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
             weight='bold')

colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')

plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM_drugs.png', transparent=True)

AUPR histogram drugs

Code
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(aupr_drugs_df[aupr_drugs_df["aupr"]>0.69])/len(aupr_drugs_df)),2))+"%"
N, bins, patches = plt.hist(aupr_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR drugs", fontsize=16)  
plt.title(perc, fontsize=16)
Text(0.5, 1.0, '33.62%')

Example drug prediction

Code
def f(drug):    
    
    return drug
Code
predictions_nodes = []
for goterm in list(platt_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
Code
# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")
Code
combobox = interactive(f, drug=widgets.Combobox(options=list(precision_drugs_df.sort_values(by=["precision"], ascending=False).index)))

Choose drug to study…

Code
display(combobox)
Code
selected_drug_name = combobox.result
Code
sns.set(rc={'figure.figsize':(4,2)})
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,selected_drug_name], platt_matrix.loc[:,selected_drug_name] )
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,selected_drug_name],  platt_matrix.loc[:,selected_drug_name])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[:,selected_drug_name]),pd.DataFrame(platt_matrix.loc[:,selected_drug_name])], axis=1)
plot.columns = ["slim","svm score"]
ax = sns.boxplot(x="slim", y="svm score", data=plot,showfliers=False )

Code
plot = pd.concat([pd.DataFrame(slim_matrix.loc[:,selected_drug_name]),pd.DataFrame(attribution_data_annotated.loc[:,selected_drug_name]*1e4)], axis=1)
plot.columns = ["slim","attribution"]
ax = sns.boxplot(x="slim", y="attribution", data=plot,showfliers=True )

Code
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[:,selected_drug_name].round(), preds_svm_matrix.loc[:,selected_drug_name])
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x2502863c8b0>

Code
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name])) #TP / (TP+FN)
print("AUC with score:",auc) 
Accuracy: 0.663471778487753
Precision: 0.8651026392961877
Recall: 0.5221238938053098
AUC with score: 0.9947429906542057
Code
# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(delta_logits_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns  = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")
train_drug_logs.sort_values(by=["probability"], ascending=False)
GO_term probability Name layer_number
578 GO:2000379 2.770126 Positive regulation of reactive oxygen species metabolic process (1) 1.0
253 GO:0043552 2.707570 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0
80 GO:0010575 2.282492 Positive regulation of vascular endothelial growth factor production (1) 0.0
633 GO:0051301 2.245231 Cell division (1) 2.0
224 GO:0046777 2.053782 Protein autophosphorylation (1) 1.0
423 GO:1902533 2.014408 Positive regulation of intracellular signal transduction (1) 2.0
458 GO:0035025 1.952270 Positive regulation of rho protein signal transduction (1) 0.0
848 GO:0071670 1.887644 Smooth muscle cell chemotaxis (1) 0.0
348 GO:0006939 1.842256 Smooth muscle contraction (1) 2.0
98 GO:0001932 1.809520 Regulation of protein phosphorylation (1) 4.0
350 GO:0045987 1.783402 Positive regulation of smooth muscle contraction (1) 1.0
45 GO:0001501 1.682999 Skeletal system development (1) 4.0
908 GO:0051899 1.675960 Membrane depolarization (1) 2.0
653 GO:0072593 1.650121 Reactive oxygen species metabolic process (1) 3.0
115 GO:0060312 1.625762 Regulation of blood vessel remodeling (1) 0.0
926 GO:0060020 1.614859 Bergmann glial cell differentiation (1) 0.0
923 GO:0048170 1.598558 Positive regulation of long-term neuronal synaptic plasticity (1) 0.0
99 GO:0001934 1.593723 Positive regulation of protein phosphorylation (1) 3.0
853 GO:0038083 1.588712 Peptidyl-tyrosine autophosphorylation (1) 0.0
713 GO:0035726 1.577983 Common myeloid progenitor cell proliferation (1) 0.0
445 GO:0048008 1.552362 Platelet-derived growth factor receptor signaling pathway (1) 1.0
857 GO:0035584 1.496390 Calcium-mediated signaling using intracellular calcium source (1) 0.0
333 GO:1904019 1.481165 Epithelial cell apoptotic process (1) 1.0
933 GO:0051150 1.474264 Regulation of smooth muscle cell differentiation (1) 1.0
352 GO:0014827 1.453480 Intestine smooth muscle contraction (1) 0.0
218 GO:0006468 1.436484 Protein phosphorylation (1) 5.0
814 GO:0090037 1.416277 Positive regulation of protein kinase c signaling (1) 0.0
894 GO:0048017 1.399588 Inositol lipid-mediated signaling (1) 1.0
506 GO:0007286 1.376021 Spermatid development (1) 1.0
742 GO:0035733 1.371582 Hepatic stellate cell activation (1) 0.0
10 GO:0051403 1.369506 Stress-activated mapk cascade (1) 2.0
702 GO:0048146 1.349525 Positive regulation of fibroblast proliferation (1) 0.0
782 GO:1902042 1.347889 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0
8 GO:0000165 1.328354 Mapk cascade (1) 3.0
531 GO:0007585 1.321161 Respiratory gaseous exchange by respiratory system (1) 1.0
492 GO:1905065 1.288165 Positive regulation of vascular associated smooth muscle cell differentiation (1) 0.0
181 GO:0006139 1.261437 Nucleobase-containing compound metabolic process (1) 6.0
670 GO:0031274 1.250440 Positive regulation of pseudopodium assembly (1) 0.0
424 GO:0030513 1.215934 Positive regulation of bmp signaling pathway (1) 0.0
12 GO:0070374 1.209229 Positive regulation of erk1 and erk2 cascade (1) 0.0
593 GO:0050865 1.191004 Regulation of cell activation (1) 5.0
562 GO:0050896 1.177718 Response to stimulus (1) 7.0
24 GO:0007346 1.156784 Regulation of mitotic cell cycle (1) 3.0
640 GO:0071310 1.131852 Cellular response to organic substance (1) 4.0
159 GO:0002548 1.126395 Monocyte chemotaxis (1) 1.0
142 GO:0002443 1.101635 Leukocyte mediated immunity (1) 4.0
141 GO:0050900 1.098856 Leukocyte migration (1) 3.0
583 GO:0032967 1.096638 Positive regulation of collagen biosynthetic process (1) 0.0
49 GO:0001569 1.083831 Branching involved in blood vessel morphogenesis (1) 0.0
437 GO:0007169 1.082604 Transmembrane receptor protein tyrosine kinase signaling pathway (1) 3.0
723 GO:0010921 1.079534 Regulation of phosphatase activity (1) 2.0
105 GO:0045860 1.071511 Positive regulation of protein kinase activity (1) 2.0
421 GO:0045747 1.070015 Positive regulation of notch signaling pathway (1) 0.0
39 GO:0008360 1.064064 Regulation of cell shape (1) 0.0
444 GO:0038084 1.058233 Vascular endothelial growth factor signaling pathway (1) 1.0
833 GO:0010467 1.038215 Gene expression (1) 5.0
365 GO:0090141 1.033165 Positive regulation of mitochondrial fission (1) 0.0
439 GO:0030509 1.027813 Bmp signaling pathway (1) 1.0
553 GO:0034394 1.017983 Protein localization to cell surface (1) 1.0
60 GO:0072210 1.009552 Metanephric nephron development (1) 1.0
427 GO:0046427 1.007946 Positive regulation of receptor signaling pathway via jak-stat (1) 1.0
684 GO:0051770 1.005599 Positive regulation of nitric-oxide synthase biosynthetic process (1) 0.0
309 GO:0034765 1.001361 Regulation of ion transmembrane transport (1) 4.0
655 GO:0008210 0.998843 Estrogen metabolic process (1) 1.0
486 GO:0048484 0.961996 Enteric nervous system development (1) 0.0
809 GO:0014068 0.961267 Positive regulation of phosphatidylinositol 3-kinase signaling (1) 0.0
441 GO:0007259 0.949624 Receptor signaling pathway via jak-stat (1) 2.0
825 GO:0036120 0.941171 Cellular response to platelet-derived growth factor stimulus (1) 0.0
667 GO:0035234 0.934130 Ectopic germ cell programmed cell death (1) 0.0
23 GO:0000278 0.933767 Mitotic cell cycle (1) 4.0
632 GO:1900006 0.924525 Positive regulation of dendrite development (1) 0.0
397 GO:0033627 0.910092 Cell adhesion mediated by integrin (1) 2.0
764 GO:0048701 0.908694 Embryonic cranial skeleton morphogenesis (1) 1.0
624 GO:0010628 0.896611 Positive regulation of gene expression (1) 3.0
862 GO:0035162 0.872366 Embryonic hemopoiesis (1) 1.0
11 GO:0043406 0.860510 Positive regulation of map kinase activity (1) 1.0
74 GO:0001817 0.857088 Regulation of cytokine production (1) 3.0
743 GO:0050918 0.840184 Positive chemotaxis (1) 1.0
829 GO:0010212 0.825924 Response to ionizing radiation (1) 1.0
922 GO:0036324 0.825686 Vascular endothelial growth factor receptor-2 signaling pathway (1) 0.0
895 GO:0070528 0.820011 Protein kinase c signaling (1) 1.0
664 GO:0051092 0.813750 Positive regulation of nf-kappab transcription factor activity (1) 0.0
27 GO:0045840 0.800332 Positive regulation of mitotic nuclear division (1) 1.0
827 GO:0010038 0.794352 Response to metal ion (1) 2.0
592 GO:0045595 0.785394 Regulation of cell differentiation (1) 4.0
263 GO:0010559 0.783904 Regulation of glycoprotein biosynthetic process (1) 1.0
503 GO:0021953 0.781212 Central nervous system neuron differentiation (1) 2.0
674 GO:0071276 0.780750 Cellular response to cadmium ion (1) 0.0
659 GO:0048469 0.780345 Cell maturation (1) 2.0
652 GO:0042180 0.773541 Cellular ketone metabolic process (1) 3.0
44 GO:0048812 0.771740 Neuron projection morphogenesis (1) 3.0
605 GO:0051901 0.770926 Positive regulation of mitochondrial depolarization (1) 0.0
155 GO:0030097 0.758070 Hemopoiesis (1) 4.0
306 GO:0043270 0.753238 Positive regulation of ion transport (1) 3.0
456 GO:0046578 0.747317 Regulation of ras protein signal transduction (1) 2.0
766 GO:0051145 0.741094 Smooth muscle cell differentiation (1) 2.0
379 GO:0032956 0.736770 Regulation of actin cytoskeleton organization (1) 3.0
898 GO:0035924 0.730231 Cellular response to vascular endothelial growth factor stimulus (1) 2.0
897 GO:0035767 0.729807 Endothelial cell chemotaxis (1) 1.0
817 GO:0010033 0.726109 Response to organic substance (1) 5.0
519 GO:0048839 0.725562 Inner ear development (1) 2.0
378 GO:0031532 0.722138 Actin cytoskeleton reorganization (1) 1.0
106 GO:0071900 0.721523 Regulation of protein serine/threonine kinase activity (1) 2.0
544 GO:0060179 0.712624 Male mating behavior (1) 0.0
763 GO:0060325 0.697674 Face morphogenesis (1) 0.0
277 GO:0016925 0.695177 Protein sumoylation (1) 1.0
328 GO:0043065 0.690749 Positive regulation of apoptotic process (1) 2.0
701 GO:0002053 0.689985 Positive regulation of mesenchymal cell proliferation (1) 0.0
77 GO:0002718 0.688320 Regulation of cytokine production involved in immune response (1) 2.0
691 GO:0043542 0.680928 Endothelial cell migration (1) 3.0
280 GO:0006810 0.677814 Transport (1) 7.0
239 GO:0006576 0.673930 Cellular biogenic amine metabolic process (1) 2.0
858 GO:0035019 0.669360 Somatic stem cell population maintenance (1) 1.0
347 GO:0006937 0.665317 Regulation of muscle contraction (1) 2.0
681 GO:2001257 0.650114 Regulation of cation channel activity (1) 2.0
711 GO:0019752 0.649156 Carboxylic acid metabolic process (1) 4.0
716 GO:0070662 0.644697 Mast cell proliferation (1) 0.0
509 GO:0060384 0.639217 Innervation (1) 1.0
900 GO:0042060 0.629958 Wound healing (1) 4.0
401 GO:0010811 0.627079 Positive regulation of cell-substrate adhesion (1) 1.0
388 GO:0051726 0.624874 Regulation of cell cycle (1) 5.0
144 GO:0031295 0.621763 T cell costimulation (1) 0.0
841 GO:0032355 0.617237 Response to estradiol (1) 1.0
126 GO:0002318 0.612203 Myeloid progenitor cell differentiation (1) 0.0
754 GO:0009888 0.609334 Tissue development (1) 4.0
430 GO:0019221 0.607804 Cytokine-mediated signaling pathway (1) 2.0
97 GO:0043129 0.607469 Surfactant homeostasis (1) 0.0
793 GO:0009725 0.604125 Response to hormone (1) 4.0
147 GO:0030218 0.603194 Erythrocyte differentiation (1) 1.0
675 GO:0071277 0.602376 Cellular response to calcium ion (1) 0.0
806 GO:0051056 0.599956 Regulation of small gtpase mediated signal transduction (1) 3.0
46 GO:0060348 0.599483 Bone development (1) 3.0
837 GO:0043536 0.595296 Positive regulation of blood vessel endothelial cell migration (1) 1.0
863 GO:0035855 0.595221 Megakaryocyte development (1) 0.0
803 GO:0042475 0.592086 Odontogenesis of dentin-containing tooth (1) 2.0
66 GO:0001824 0.588798 Blastocyst development (1) 1.0
495 GO:0060976 0.586013 Coronary vasculature development (1) 1.0
838 GO:0038033 0.585984 Positive regulation of endothelial cell chemotaxis by vegf-activated vascular endothelial growth factor receptor signaling pathway (1) 0.0
626 GO:0051649 0.585010 Establishment of localization in cell (1) 4.0
899 GO:0035994 0.581520 Response to muscle stretch (1) 1.0
398 GO:0045785 0.575910 Positive regulation of cell adhesion (1) 3.0
471 GO:0060045 0.574196 Positive regulation of cardiac muscle cell proliferation (1) 0.0
617 GO:0008354 0.569914 Germ cell migration (1) 0.0
331 GO:0071887 0.563795 Leukocyte apoptotic process (1) 2.0
508 GO:0042552 0.562630 Myelination (1) 2.0
47 GO:0048704 0.560470 Embryonic skeletal system morphogenesis (1) 2.0
386 GO:0007049 0.553817 Cell cycle (1) 6.0
346 GO:0006936 0.550409 Muscle contraction (1) 3.0
225 GO:0006470 0.541583 Protein dephosphorylation (1) 3.0
750 GO:0044281 0.539816 Small molecule metabolic process (1) 5.0
835 GO:0016239 0.538943 Positive regulation of macroautophagy (1) 1.0
219 GO:0006975 0.532966 Dna damage induced protein phosphorylation (1) 0.0
573 GO:0010629 0.530721 Negative regulation of gene expression (1) 3.0
354 GO:0050728 0.528698 Negative regulation of inflammatory response (1) 2.0
746 GO:0033327 0.520751 Leydig cell differentiation (1) 0.0
802 GO:0060766 0.520552 Negative regulation of androgen receptor signaling pathway (1) 0.0
249 GO:0019216 0.518909 Regulation of lipid metabolic process (1) 3.0
231 GO:0033619 0.516921 Membrane protein proteolysis (1) 1.0
428 GO:0007219 0.510598 Notch signaling pathway (1) 1.0
906 GO:0043549 0.506907 Regulation of kinase activity (1) 3.0
59 GO:0035788 0.505476 Cell migration involved in metanephros development (1) 0.0
708 GO:0050890 0.505448 Cognition (1) 2.0
468 GO:1903010 0.502791 Regulation of bone development (1) 0.0
912 GO:0043534 0.496543 Blood vessel endothelial cell migration (1) 2.0
148 GO:0050852 0.496154 T cell receptor signaling pathway (1) 1.0
639 GO:0060326 0.490465 Cell chemotaxis (1) 2.0
317 GO:0006897 0.488535 Endocytosis (1) 3.0
465 GO:0007275 0.485383 Multicellular organism development (1) 7.0
772 GO:0060485 0.484666 Mesenchyme development (1) 3.0
823 GO:0045471 0.483879 Response to ethanol (1) 1.0
373 GO:0030838 0.479914 Positive regulation of actin filament polymerization (1) 0.0
160 GO:0002573 0.478546 Myeloid leukocyte differentiation (1) 3.0
877 GO:0060437 0.473475 Lung growth (1) 0.0
245 GO:0006629 0.469312 Lipid metabolic process (1) 5.0
114 GO:0051894 0.466675 Positive regulation of focal adhesion assembly (1) 0.0
406 GO:0048041 0.466386 Focal adhesion assembly (1) 1.0
932 GO:0097021 0.465441 Lymphocyte migration into lymphoid organs (1) 0.0
339 GO:2000352 0.461676 Negative regulation of endothelial cell apoptotic process (1) 0.0
874 GO:1903053 0.452737 Regulation of extracellular matrix organization (1) 1.0
284 GO:0051050 0.451125 Positive regulation of transport (1) 4.0
13 GO:0046330 0.444601 Positive regulation of jnk cascade (1) 0.0
256 GO:0046474 0.443735 Glycerophospholipid biosynthetic process (1) 2.0
588 GO:0019222 0.443139 Regulation of metabolic process (1) 7.0
459 GO:0007267 0.428833 Cell-cell signaling (1) 5.0
919 GO:0046677 0.427086 Response to antibiotic (1) 1.0
462 GO:0007268 0.426575 Chemical synaptic transmission (1) 4.0
706 GO:0043547 0.426347 Positive regulation of gtpase activity (1) 1.0
660 GO:0045347 0.425159 Negative regulation of mhc class ii biosynthetic process (1) 0.0
290 GO:2000300 0.421019 Regulation of synaptic vesicle exocytosis (1) 1.0
791 GO:0009582 0.420489 Detection of abiotic stimulus (1) 2.0
907 GO:0051881 0.420291 Regulation of mitochondrial membrane potential (1) 1.0
517 GO:0043586 0.419816 Tongue development (1) 1.0
533 GO:0030168 0.419395 Platelet activation (1) 2.0
469 GO:0060173 0.411322 Limb development (1) 1.0
90 GO:0072284 0.410934 Metanephric s-shaped body morphogenesis (1) 0.0
780 GO:0046890 0.404557 Regulation of lipid biosynthetic process (1) 2.0
229 GO:0016485 0.400777 Protein processing (1) 3.0
446 GO:0048010 0.399575 Vascular endothelial growth factor receptor signaling pathway (1) 1.0
402 GO:0022407 0.398607 Regulation of cell-cell adhesion (1) 3.0
287 GO:0099111 0.397633 Microtubule-based transport (1) 3.0
194 GO:0006355 0.390046 Regulation of transcription, dna-templated (1) 4.0
913 GO:0090630 0.382248 Activation of gtpase activity (1) 0.0
122 GO:0002062 0.381242 Chondrocyte differentiation (1) 2.0
629 GO:0051174 0.380683 Regulation of phosphorus metabolic process (1) 6.0
668 GO:0010976 0.374728 Positive regulation of neuron projection development (1) 1.0
604 GO:0045088 0.374664 Regulation of innate immune response (1) 2.0
139 GO:0002684 0.374517 Positive regulation of immune system process (1) 4.0
250 GO:0044255 0.373685 Cellular lipid metabolic process (1) 4.0
163 GO:0002685 0.368480 Regulation of leukocyte migration (1) 2.0
369 GO:0055003 0.366810 Cardiac myofibril assembly (1) 0.0
265 GO:0051247 0.362984 Positive regulation of protein metabolic process (1) 4.0
795 GO:0043627 0.362212 Response to estrogen (1) 1.0
707 GO:0007202 0.359908 Activation of phospholipase c activity (1) 0.0
223 GO:0018108 0.359532 Peptidyl-tyrosine phosphorylation (1) 3.0
832 GO:0042220 0.359499 Response to cocaine (1) 1.0
715 GO:0061351 0.358465 Neural precursor cell proliferation (1) 3.0
873 GO:0002327 0.358081 Immature b cell differentiation (1) 0.0
16 GO:0046329 0.357586 Negative regulation of jnk cascade (1) 1.0
409 GO:0007166 0.352431 Cell surface receptor signaling pathway (1) 4.0
753 GO:1901135 0.352161 Carbohydrate derivative metabolic process (1) 4.0
69 GO:0001755 0.350614 Neural crest cell migration (1) 1.0
440 GO:0030512 0.350318 Negative regulation of transforming growth factor beta receptor signaling pathway (1) 0.0
563 GO:1900272 0.349758 Negative regulation of long-term synaptic potentiation (1) 0.0
650 GO:0034329 0.348915 Cell junction assembly (1) 2.0
230 GO:0030162 0.336130 Regulation of proteolysis (1) 3.0
175 GO:0005975 0.330773 Carbohydrate metabolic process (1) 4.0
627 GO:0035306 0.328011 Positive regulation of dephosphorylation (1) 1.0
400 GO:0007160 0.326200 Cell-matrix adhesion (1) 2.0
455 GO:0016601 0.324410 Rac protein signal transduction (1) 2.0
697 GO:0050678 0.322499 Regulation of epithelial cell proliferation (1) 3.0
71 GO:0060444 0.322093 Branching involved in mammary gland duct morphogenesis (1) 1.0
226 GO:0035304 0.320405 Regulation of protein dephosphorylation (1) 2.0
412 GO:0009966 0.306725 Regulation of signal transduction (1) 5.0
914 GO:0046486 0.298756 Glycerolipid metabolic process (1) 3.0
539 GO:0050795 0.293322 Regulation of behavior (1) 2.0
86 GO:0072006 0.288499 Nephron development (1) 2.0
337 GO:0043525 0.284469 Positive regulation of neuron apoptotic process (1) 0.0
625 GO:0051902 0.284041 Negative regulation of mitochondrial depolarization (1) 0.0
58 GO:0003338 0.282674 Metanephros morphogenesis (1) 1.0
811 GO:0043124 0.281702 Negative regulation of i-kappab kinase/nf-kappab signaling (1) 0.0
812 GO:0051897 0.280267 Positive regulation of protein kinase b signaling (1) 0.0
203 GO:0006163 0.279176 Purine nucleotide metabolic process (1) 3.0
543 GO:0007612 0.277571 Learning (1) 1.0
466 GO:0007389 0.275989 Pattern specification process (1) 3.0
883 GO:0032148 0.273937 Activation of protein kinase b activity (1) 0.0
483 GO:0035909 0.270466 Aorta morphogenesis (1) 1.0
755 GO:0016358 0.270034 Dendrite development (1) 3.0
307 GO:0034766 0.269283 Negative regulation of ion transmembrane transport (1) 1.0
513 GO:0030900 0.265273 Forebrain development (1) 3.0
479 GO:0048598 0.253519 Embryonic morphogenesis (1) 4.0
507 GO:0007416 0.252929 Synapse assembly (1) 1.0
860 GO:0051928 0.247690 Positive regulation of calcium ion transport (1) 2.0
651 GO:0050808 0.247245 Synapse organization (1) 3.0
890 GO:0071353 0.244414 Cellular response to interleukin-4 (1) 1.0
392 GO:0032467 0.243007 Positive regulation of cytokinesis (1) 0.0
695 GO:0015980 0.237863 Energy derivation by oxidation of organic compounds (1) 3.0
235 GO:0032436 0.237590 Positive regulation of proteasomal ubiquitin-dependent protein catabolic process (1) 1.0
560 GO:0043473 0.231280 Pigmentation (1) 2.0
463 GO:0035249 0.230532 Synaptic transmission, glutamatergic (1) 1.0
870 GO:0070527 0.229018 Platelet aggregation (1) 1.0
273 GO:0016575 0.227418 Histone deacetylation (1) 2.0
32 GO:0000422 0.224098 Autophagy of mitochondrion (1) 2.0
113 GO:0001952 0.223395 Regulation of cell-matrix adhesion (1) 1.0
9 GO:0043408 0.223296 Regulation of mapk cascade (1) 2.0
125 GO:0002244 0.223128 Hematopoietic progenitor cell differentiation (1) 2.0
132 GO:0002274 0.221454 Myeloid leukocyte activation (1) 2.0
869 GO:0030318 0.209787 Melanocyte differentiation (1) 1.0
0 GO:0000077 0.209594 Dna damage checkpoint signaling (1) 1.0
255 GO:0006644 0.209513 Phospholipid metabolic process (1) 3.0
645 GO:0071300 0.209465 Cellular response to retinoic acid (1) 0.0
729 GO:0120035 0.207325 Regulation of plasma membrane bounded cell projection organization (1) 3.0
796 GO:0097067 0.207250 Cellular response to thyroid hormone stimulus (1) 0.0
516 GO:0007423 0.206232 Sensory organ development (1) 3.0
410 GO:0007186 0.205939 G protein-coupled receptor signaling pathway (1) 3.0
722 GO:0042325 0.203618 Regulation of phosphorylation (1) 5.0
794 GO:0043434 0.202441 Response to peptide hormone (1) 3.0
88 GO:0090184 0.201141 Positive regulation of kidney development (1) 0.0
431 GO:0031663 0.200051 Lipopolysaccharide-mediated signaling pathway (1) 1.0
866 GO:0030101 0.198593 Natural killer cell activation (1) 2.0
38 GO:0000902 0.193896 Cell morphogenesis (1) 4.0
259 GO:0009259 0.192137 Ribonucleotide metabolic process (1) 3.0
703 GO:0048661 0.191886 Positive regulation of smooth muscle cell proliferation (1) 1.0
740 GO:0014911 0.191054 Positive regulation of smooth muscle cell migration (1) 1.0
714 GO:0050673 0.189629 Epithelial cell proliferation (1) 4.0
43 GO:0050770 0.188075 Regulation of axonogenesis (1) 2.0
381 GO:0008064 0.187642 Regulation of actin polymerization or depolymerization (1) 2.0
851 GO:0070933 0.185342 Histone h4 deacetylation (1) 0.0
762 GO:0060749 0.181899 Mammary gland alveolus development (1) 0.0
454 GO:0007266 0.181338 Rho protein signal transduction (1) 1.0
541 GO:0008542 0.180964 Visual learning (1) 0.0
499 GO:1990384 0.180877 Hyaloid vascular plexus regression (1) 0.0
320 GO:0006914 0.178279 Autophagy (1) 4.0
42 GO:0048675 0.176485 Axon extension (1) 2.0
234 GO:0010952 0.174912 Positive regulation of peptidase activity (1) 2.0
192 GO:0006352 0.168084 Dna-templated transcription, initiation (1) 3.0
448 GO:0008277 0.168080 Regulation of g protein-coupled receptor signaling pathway (1) 2.0
582 GO:1902459 0.162029 Positive regulation of stem cell population maintenance (1) 0.0
489 GO:0021766 0.161871 Hippocampus development (1) 1.0
154 GO:0050778 0.157385 Positive regulation of immune response (1) 3.0
187 GO:0071897 0.156799 Dna biosynthetic process (1) 2.0
7 GO:0000122 0.155105 Negative regulation of transcription by rna polymerase ii (1) 1.0
821 GO:0034097 0.153698 Response to cytokine (1) 3.0
124 GO:0002218 0.151919 Activation of innate immune response (1) 1.0
699 GO:0033689 0.151375 Negative regulation of osteoblast proliferation (1) 0.0
288 GO:0032940 0.151138 Secretion by cell (1) 5.0
854 GO:0070102 0.145028 Interleukin-6-mediated signaling pathway (1) 0.0
361 GO:0010638 0.143294 Positive regulation of organelle organization (1) 2.0
612 GO:0019827 0.141470 Stem cell population maintenance (1) 2.0
781 GO:0008625 0.139268 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0
101 GO:0033141 0.136334 Positive regulation of peptidyl-serine phosphorylation of stat protein (1) 0.0
611 GO:1902904 0.136088 Negative regulation of supramolecular fiber organization (1) 2.0
643 GO:0071222 0.135913 Cellular response to lipopolysaccharide (1) 2.0
394 GO:0007059 0.133987 Chromosome segregation (1) 3.0
824 GO:0071363 0.126406 Cellular response to growth factor stimulus (1) 3.0
53 GO:0001570 0.126375 Vasculogenesis (1) 1.0
738 GO:0098586 0.123247 Cellular response to virus (1) 1.0
472 GO:0001553 0.119373 Luteinization (1) 0.0
109 GO:1905564 0.116462 Positive regulation of vascular endothelial cell proliferation (1) 0.0
709 GO:0019233 0.115463 Sensory perception of pain (1) 1.0
728 GO:0030032 0.113460 Lamellipodium assembly (1) 1.0
257 GO:0046488 0.111935 Phosphatidylinositol metabolic process (1) 2.0
313 GO:0051281 0.111192 Positive regulation of release of sequestered calcium ion into cytosol (1) 1.0
924 GO:0035754 0.107144 B cell chemotaxis (1) 0.0
658 GO:0048468 0.103878 Cell development (1) 5.0
545 GO:0008016 0.102922 Regulation of heart contraction (1) 3.0
532 GO:0007596 0.102694 Blood coagulation (1) 3.0
103 GO:0042531 0.102081 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0
938 GO:0007158 0.100508 Neuron cell-cell adhesion (1) 0.0
383 GO:0051497 0.100321 Negative regulation of stress fiber assembly (1) 0.0
57 GO:0001656 0.100188 Metanephros development (1) 2.0
83 GO:0002720 0.098565 Positive regulation of cytokine production involved in immune response (1) 1.0
382 GO:0030041 0.097715 Actin filament polymerization (1) 2.0
89 GO:0072239 0.093313 Metanephric glomerulus vasculature development (1) 0.0
180 GO:0019318 0.092513 Hexose metabolic process (1) 2.0
555 GO:1903078 0.092013 Positive regulation of protein localization to plasma membrane (1) 1.0
434 GO:0048011 0.091817 Neurotrophin trk receptor signaling pathway (1) 1.0
119 GO:0060740 0.091714 Prostate gland epithelium morphogenesis (1) 1.0
95 GO:0001894 0.091179 Tissue homeostasis (1) 2.0
661 GO:0046326 0.090707 Positive regulation of glucose import (1) 0.0
435 GO:0016055 0.090703 Wnt signaling pathway (1) 2.0
613 GO:0032922 0.089808 Circadian regulation of gene expression (1) 0.0
450 GO:0051209 0.087976 Release of sequestered calcium ion into cytosol (1) 3.0
477 GO:0040016 0.085668 Embryonic cleavage (1) 0.0
607 GO:0042391 0.085377 Regulation of membrane potential (1) 3.0
669 GO:0010592 0.084513 Positive regulation of lamellipodium assembly (1) 0.0
170 GO:0003007 0.084095 Heart morphogenesis (1) 3.0
269 GO:0031397 0.081637 Negative regulation of protein ubiquitination (1) 1.0
206 GO:0006298 0.080826 Mismatch repair (1) 0.0
31 GO:0070301 0.080430 Cellular response to hydrogen peroxide (1) 1.0
130 GO:0002819 0.080102 Regulation of adaptive immune response (1) 3.0
29 GO:1901031 0.077813 Regulation of response to reactive oxygen species (1) 1.0
242 GO:0090314 0.077607 Positive regulation of protein targeting to membrane (1) 0.0
417 GO:0060395 0.076956 Smad protein signal transduction (1) 1.0
710 GO:0055119 0.076428 Relaxation of cardiac muscle (1) 1.0
567 GO:0051641 0.073979 Cellular localization (1) 5.0
491 GO:0060440 0.069181 Trachea formation (1) 0.0
608 GO:0043114 0.069090 Regulation of vascular permeability (1) 1.0
190 GO:0045739 0.068981 Positive regulation of dna repair (1) 1.0
283 GO:0051049 0.068824 Regulation of transport (1) 5.0
595 GO:0051302 0.067812 Regulation of cell division (1) 1.0
91 GO:0001837 0.067510 Epithelial to mesenchymal transition (1) 2.0
110 GO:0001942 0.066958 Hair follicle development (1) 1.0
275 GO:0090042 0.066526 Tubulin deacetylation (1) 1.0
41 GO:0007411 0.065935 Axon guidance (1) 2.0
679 GO:0042310 0.063952 Vasoconstriction (1) 1.0
232 GO:0010951 0.062732 Negative regulation of endopeptidase activity (1) 2.0
865 GO:0030217 0.062144 T cell differentiation (1) 3.0
171 GO:0003014 0.057983 Renal system process (1) 2.0
501 GO:0007507 0.057228 Heart development (1) 4.0
70 GO:0001763 0.054372 Morphogenesis of a branching structure (1) 2.0
351 GO:0060048 0.052464 Cardiac muscle contraction (1) 2.0
121 GO:0090050 0.049572 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0
169 GO:0002764 0.049060 Immune response-regulating signaling pathway (1) 3.0
630 GO:2000377 0.047289 Regulation of reactive oxygen species metabolic process (1) 2.0
537 GO:0007626 0.045891 Locomotory behavior (1) 2.0
799 GO:0009743 0.045252 Response to carbohydrate (1) 2.0
301 GO:0032388 0.044514 Positive regulation of intracellular transport (1) 1.0
40 GO:0034446 0.043394 Substrate adhesion-dependent cell spreading (1) 1.0
152 GO:0045087 0.042899 Innate immune response (1) 3.0
73 GO:0001779 0.041596 Natural killer cell differentiation (1) 1.0
212 GO:0051090 0.040406 Regulation of dna-binding transcription factor activity (1) 2.0
864 GO:0030183 0.038386 B cell differentiation (1) 1.0
705 GO:0051353 0.036453 Positive regulation of oxidoreductase activity (1) 1.0
587 GO:0050921 0.036116 Positive regulation of chemotaxis (1) 1.0
760 GO:0048286 0.035821 Lung alveolus development (1) 1.0
254 GO:0006687 0.035424 Glycosphingolipid metabolic process (1) 2.0
228 GO:0006508 0.033855 Proteolysis (1) 4.0
757 GO:0021987 0.030030 Cerebral cortex development (1) 2.0
673 GO:0043392 0.029313 Negative regulation of dna binding (1) 1.0
530 GO:0007584 0.028301 Response to nutrient (1) 3.0
712 GO:0033002 0.028034 Muscle cell proliferation (1) 2.0
792 GO:0060560 0.027744 Developmental growth involved in morphogenesis (1) 3.0
797 GO:0071383 0.026769 Cellular response to steroid hormone stimulus (1) 2.0
920 GO:0036092 0.025989 Phosphatidylinositol-3-phosphate biosynthetic process (1) 0.0
396 GO:0007162 0.024820 Negative regulation of cell adhesion (1) 2.0
498 GO:0061029 0.024413 Eyelid development in camera-type eye (1) 0.0
480 GO:0048565 0.023576 Digestive tract development (1) 1.0
51 GO:0045766 0.022903 Positive regulation of angiogenesis (1) 1.0
574 GO:0008285 0.020185 Negative regulation of cell population proliferation (1) 3.0
521 GO:0035051 0.019783 Cardiocyte differentiation (1) 2.0
205 GO:0045740 0.019424 Positive regulation of dna replication (1) 1.0
487 GO:0030325 0.018166 Adrenal gland development (1) 0.0
601 GO:0034103 0.018123 Regulation of tissue remodeling (1) 1.0
261 GO:0051171 0.014388 Regulation of nitrogen compound metabolic process (1) 6.0
570 GO:0045597 0.014362 Positive regulation of cell differentiation (1) 3.0
842 GO:0071407 0.014361 Cellular response to organic cyclic compound (1) 3.0
936 GO:0051000 0.014216 Positive regulation of nitric-oxide synthase activity (1) 0.0
461 GO:0050804 0.011722 Modulation of chemical synaptic transmission (1) 3.0
54 GO:2001214 0.010912 Positive regulation of vasculogenesis (1) 0.0
236 GO:0043161 0.009887 Proteasome-mediated ubiquitin-dependent protein catabolic process (1) 2.0
734 GO:0051702 0.006784 Biological process involved in interaction with symbiont (1) 2.0
717 GO:0046651 0.006589 Lymphocyte proliferation (1) 2.0
323 GO:0016236 0.006518 Macroautophagy (1) 3.0
158 GO:0002862 0.006214 Negative regulation of inflammatory response to antigenic stimulus (1) 1.0
619 GO:0050790 0.006102 Regulation of catalytic activity (1) 4.0
520 GO:0007498 0.005849 Mesoderm development (1) 3.0
136 GO:0002376 0.005563 Immune system process (1) 6.0
631 GO:0050920 0.005385 Regulation of chemotaxis (1) 2.0
403 GO:0033628 0.003111 Regulation of cell adhesion mediated by integrin (1) 1.0
345 GO:0006919 0.001149 Activation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
730 GO:0031175 0.000000 Neuron projection development (1) 4.0
304 GO:0046942 -0.001338 Carboxylic acid transport (1) 3.0
474 GO:0030539 -0.001885 Male genitalia development (1) 0.0
561 GO:0048511 -0.003138 Rhythmic process (1) 3.0
736 GO:0009617 -0.003666 Response to bacterium (1) 3.0
529 GO:0007568 -0.004377 Aging (1) 1.0
747 GO:0048878 -0.006747 Chemical homeostasis (1) 6.0
291 GO:0030072 -0.007065 Peptide hormone secretion (1) 3.0
580 GO:0045907 -0.009718 Positive regulation of vasoconstriction (1) 0.0
467 GO:0009791 -0.011347 Post-embryonic development (1) 1.0
935 GO:0048103 -0.011384 Somatic stem cell division (1) 0.0
657 GO:0045165 -0.012040 Cell fate commitment (1) 3.0
414 GO:0023019 -0.016733 Signal transduction involved in regulation of gene expression (1) 0.0
429 GO:0007229 -0.017112 Integrin-mediated signaling pathway (1) 1.0
312 GO:1904062 -0.018714 Regulation of cation transmembrane transport (1) 3.0
296 GO:0015031 -0.019362 Protein transport (1) 4.0
191 GO:0045944 -0.020718 Positive regulation of transcription by rna polymerase ii (1) 2.0
602 GO:0046620 -0.020955 Regulation of organ growth (1) 1.0
247 GO:0008610 -0.024022 Lipid biosynthetic process (1) 3.0
576 GO:0050866 -0.024773 Negative regulation of cell activation (1) 2.0
868 GO:0048863 -0.027968 Stem cell differentiation (1) 2.0
310 GO:0051924 -0.028544 Regulation of calcium ion transport (1) 3.0
332 GO:0097190 -0.028734 Apoptotic signaling pathway (1) 4.0
482 GO:0048557 -0.029065 Embryonic digestive tract morphogenesis (1) 0.0
262 GO:0045429 -0.029754 Positive regulation of nitric oxide biosynthetic process (1) 0.0
622 GO:0043086 -0.032086 Negative regulation of catalytic activity (1) 3.0
623 GO:0031334 -0.034770 Positive regulation of protein-containing complex assembly (1) 2.0
131 GO:0002821 -0.037160 Positive regulation of adaptive immune response (1) 2.0
618 GO:0032409 -0.037577 Regulation of transporter activity (1) 3.0
63 GO:0001666 -0.040150 Response to hypoxia (1) 2.0
196 GO:0006357 -0.041500 Regulation of transcription by rna polymerase ii (1) 3.0
270 GO:0031056 -0.041705 Regulation of histone modification (1) 2.0
733 GO:0030335 -0.043802 Positive regulation of cell migration (1) 3.0
93 GO:0001843 -0.044624 Neural tube closure (1) 1.0
370 GO:0033044 -0.044682 Regulation of chromosome organization (1) 2.0
408 GO:0007165 -0.044905 Signal transduction (1) 6.0
153 GO:0050776 -0.045236 Regulation of immune response (1) 4.0
227 GO:0032516 -0.046939 Positive regulation of phosphoprotein phosphatase activity (1) 0.0
217 GO:0006457 -0.046986 Protein folding (1) 1.0
415 GO:0030522 -0.048782 Intracellular receptor signaling pathway (1) 2.0
366 GO:0090201 -0.049666 Negative regulation of release of cytochrome c from mitochondria (1) 0.0
436 GO:0060079 -0.050457 Excitatory postsynaptic potential (1) 1.0
380 GO:0051017 -0.052351 Actin filament bundle assembly (1) 1.0
903 GO:0050863 -0.053759 Regulation of t cell activation (1) 3.0
78 GO:0032760 -0.054179 Positive regulation of tumor necrosis factor production (1) 0.0
188 GO:0009165 -0.055647 Nucleotide biosynthetic process (1) 3.0
904 GO:0042113 -0.058054 B cell activation (1) 3.0
138 GO:0002683 -0.058846 Negative regulation of immune system process (1) 3.0
826 GO:0071364 -0.059153 Cellular response to epidermal growth factor stimulus (1) 0.0
915 GO:0050872 -0.059361 White fat cell differentiation (1) 0.0
102 GO:0050731 -0.063114 Positive regulation of peptidyl-tyrosine phosphorylation (1) 2.0
133 GO:0043303 -0.063260 Mast cell degranulation (1) 1.0
494 GO:0060840 -0.069316 Artery development (1) 2.0
504 GO:0048709 -0.071150 Oligodendrocyte differentiation (1) 2.0
178 GO:0005984 -0.071330 Disaccharide metabolic process (1) 0.0
737 GO:0051607 -0.071636 Defense response to virus (1) 2.0
732 GO:0060997 -0.074427 Dendritic spine morphogenesis (1) 1.0
460 GO:0023061 -0.075653 Signal release (1) 4.0
67 GO:0001892 -0.076334 Embryonic placenta development (1) 1.0
724 GO:0036473 -0.088337 Cell death in response to oxidative stress (1) 2.0
447 GO:0007173 -0.090517 Epidermal growth factor receptor signaling pathway (1) 2.0
353 GO:0006954 -0.092872 Inflammatory response (1) 3.0
861 GO:0090280 -0.094645 Positive regulation of calcium ion import (1) 0.0
731 GO:0031529 -0.095254 Ruffle organization (1) 1.0
779 GO:0008544 -0.096671 Epidermis development (1) 3.0
901 GO:0042110 -0.098950 T cell activation (1) 4.0
690 GO:2001020 -0.101079 Regulation of response to dna damage stimulus (1) 2.0
360 GO:0007030 -0.102202 Golgi organization (1) 1.0
295 GO:0051047 -0.107145 Positive regulation of secretion (1) 3.0
786 GO:0034605 -0.107349 Cellular response to heat (1) 1.0
749 GO:0009058 -0.108343 Biosynthetic process (1) 5.0
688 GO:1900407 -0.109537 Regulation of cellular response to oxidative stress (1) 2.0
735 GO:0046718 -0.110283 Viral entry into host cell (1) 1.0
76 GO:0001819 -0.110409 Positive regulation of cytokine production (1) 2.0
50 GO:0002040 -0.110656 Sprouting angiogenesis (1) 1.0
692 GO:0010595 -0.112175 Positive regulation of endothelial cell migration (1) 2.0
419 GO:0003376 -0.112243 Sphingosine-1-phosphate receptor signaling pathway (1) 0.0
374 GO:2000251 -0.113035 Positive regulation of actin cytoskeleton reorganization (1) 0.0
845 GO:0016071 -0.114794 Mrna metabolic process (1) 4.0
589 GO:0042752 -0.116589 Regulation of circadian rhythm (1) 2.0
615 GO:0035265 -0.117147 Organ growth (1) 2.0
918 GO:0046854 -0.117793 Phosphatidylinositol phosphate biosynthetic process (1) 1.0
485 GO:0007422 -0.118904 Peripheral nervous system development (1) 2.0
81 GO:0032729 -0.121441 Positive regulation of interferon-gamma production (1) 0.0
591 GO:0010941 -0.121750 Regulation of cell death (1) 5.0
759 GO:0032835 -0.121813 Glomerulus development (1) 1.0
511 GO:0030182 -0.123468 Neuron differentiation (1) 5.0
260 GO:0006807 -0.123894 Nitrogen compound metabolic process (1) 7.0
859 GO:0021782 -0.127920 Glial cell development (1) 2.0
108 GO:0001938 -0.130000 Positive regulation of endothelial cell proliferation (1) 1.0
335 GO:2000270 -0.130342 Negative regulation of fibroblast apoptotic process (1) 0.0
162 GO:0060374 -0.130465 Mast cell differentiation (1) 0.0
87 GO:0072073 -0.132456 Kidney epithelium development (1) 2.0
183 GO:0006401 -0.132593 Rna catabolic process (1) 3.0
818 GO:0010243 -0.134405 Response to organonitrogen compound (1) 4.0
641 GO:0071417 -0.135033 Cellular response to organonitrogen compound (1) 3.0
464 GO:0060291 -0.136241 Long-term synaptic potentiation (1) 1.0
929 GO:0051341 -0.137416 Regulation of oxidoreductase activity (1) 2.0
581 GO:0120162 -0.138251 Positive regulation of cold-induced thermogenesis (1) 0.0
389 GO:0010564 -0.140360 Regulation of cell cycle process (1) 4.0
443 GO:0035860 -0.140553 Glial cell-derived neurotrophic factor receptor signaling pathway (1) 0.0
20 GO:0070507 -0.141310 Regulation of microtubule cytoskeleton organization (1) 2.0
453 GO:0007265 -0.142317 Ras protein signal transduction (1) 3.0
751 GO:0046034 -0.143417 Atp metabolic process (1) 2.0
3 GO:1900087 -0.145967 Positive regulation of g1/s transition of mitotic cell cycle (1) 0.0
214 GO:0006412 -0.146506 Translation (1) 4.0
566 GO:0032879 -0.149430 Regulation of localization (1) 6.0
671 GO:0032092 -0.152370 Positive regulation of protein binding (1) 1.0
761 GO:0060021 -0.155229 Roof of mouth development (1) 1.0
420 GO:2001241 -0.155496 Positive regulation of extrinsic apoptotic signaling pathway in absence of ligand (1) 0.0
405 GO:0007159 -0.155632 Leukocyte cell-cell adhesion (1) 3.0
358 GO:0007005 -0.155655 Mitochondrion organization (1) 3.0
210 GO:1902275 -0.156027 Regulation of chromatin organization (1) 1.0
311 GO:0030001 -0.158264 Metal ion transport (1) 5.0
810 GO:0043123 -0.158702 Positive regulation of i-kappab kinase/nf-kappab signaling (1) 0.0
168 GO:0045580 -0.161790 Regulation of t cell differentiation (1) 2.0
72 GO:0001764 -0.163226 Neuron migration (1) 1.0
776 GO:0060644 -0.163884 Mammary gland epithelial cell differentiation (1) 0.0
319 GO:0006909 -0.166043 Phagocytosis (1) 2.0
268 GO:0016570 -0.169677 Histone modification (1) 4.0
788 GO:0009410 -0.171927 Response to xenobiotic stimulus (1) 2.0
128 GO:1902036 -0.173697 Regulation of hematopoietic stem cell differentiation (1) 0.0
609 GO:0048167 -0.175411 Regulation of synaptic plasticity (1) 2.0
377 GO:0031032 -0.181549 Actomyosin structure organization (1) 2.0
438 GO:0007179 -0.182780 Transforming growth factor beta receptor signaling pathway (1) 1.0
28 GO:0000302 -0.183602 Response to reactive oxygen species (1) 2.0
745 GO:0009653 -0.188122 Anatomical structure morphogenesis (1) 5.0
395 GO:0007155 -0.188355 Cell adhesion (1) 5.0
535 GO:0050910 -0.188639 Detection of mechanical stimulus involved in sensory perception of sound (1) 0.0
14 GO:0043407 -0.189998 Negative regulation of map kinase activity (1) 1.0
636 GO:0030048 -0.190360 Actin filament-based movement (1) 2.0
909 GO:0042593 -0.191255 Glucose homeostasis (1) 2.0
546 GO:0008104 -0.192695 Protein localization (1) 5.0
289 GO:0007269 -0.196526 Neurotransmitter secretion (1) 2.0
885 GO:1900180 -0.200364 Regulation of protein localization to nucleus (1) 1.0
30 GO:1901300 -0.200519 Positive regulation of hydrogen peroxide-mediated programmed cell death (1) 0.0
739 GO:1902903 -0.203430 Regulation of supramolecular fiber organization (1) 3.0
847 GO:0045055 -0.204997 Regulated exocytosis (1) 2.0
299 GO:0030705 -0.206607 Cytoskeleton-dependent intracellular transport (1) 3.0
340 GO:2001234 -0.207987 Negative regulation of apoptotic signaling pathway (1) 3.0
849 GO:0043966 -0.208541 Histone h3 acetylation (1) 2.0
286 GO:0055085 -0.210703 Transmembrane transport (1) 6.0
363 GO:0051494 -0.211867 Negative regulation of cytoskeleton organization (1) 2.0
48 GO:0001525 -0.211963 Angiogenesis (1) 2.0
813 GO:0051898 -0.215146 Negative regulation of protein kinase b signaling (1) 0.0
700 GO:0033690 -0.223856 Positive regulation of osteoblast proliferation (1) 0.0
209 GO:0006338 -0.224254 Chromatin remodeling (1) 2.0
416 GO:0035556 -0.225162 Intracellular signal transduction (1) 4.0
916 GO:2000739 -0.225462 Regulation of mesenchymal stem cell differentiation (1) 0.0
79 GO:0032740 -0.225949 Positive regulation of interleukin-17 production (1) 0.0
572 GO:0034767 -0.226631 Positive regulation of ion transmembrane transport (1) 2.0
272 GO:0031398 -0.229217 Positive regulation of protein ubiquitination (1) 2.0
222 GO:0018107 -0.231930 Peptidyl-threonine phosphorylation (1) 1.0
879 GO:0031103 -0.232786 Axon regeneration (1) 1.0
413 GO:0009968 -0.233227 Negative regulation of signal transduction (1) 4.0
765 GO:0021795 -0.237208 Cerebral cortex cell migration (1) 1.0
325 GO:0006915 -0.240649 Apoptotic process (1) 5.0
172 GO:0003158 -0.242769 Endothelium development (1) 2.0
357 GO:0006997 -0.247765 Nucleus organization (1) 2.0
725 GO:0070997 -0.248013 Neuron death (1) 2.0
355 GO:0050729 -0.250210 Positive regulation of inflammatory response (1) 1.0
233 GO:0016579 -0.250907 Protein deubiquitination (1) 1.0
666 GO:0010727 -0.258854 Negative regulation of hydrogen peroxide metabolic process (1) 0.0
202 GO:0006753 -0.259533 Nucleoside phosphate metabolic process (1) 4.0
685 GO:0051973 -0.259761 Positive regulation of telomerase activity (1) 0.0
549 GO:0009306 -0.261559 Protein secretion (1) 3.0
778 GO:0045793 -0.270168 Positive regulation of cell size (1) 0.0
303 GO:1904659 -0.270188 Glucose transmembrane transport (1) 1.0
676 GO:0030282 -0.272104 Bone mineralization (1) 1.0
294 GO:0051046 -0.276779 Regulation of secretion (1) 4.0
362 GO:0033043 -0.280077 Regulation of organelle organization (1) 4.0
123 GO:0002064 -0.285938 Epithelial cell development (1) 2.0
282 GO:0016192 -0.287866 Vesicle-mediated transport (1) 4.0
678 GO:0071333 -0.289435 Cellular response to glucose stimulus (1) 1.0
523 GO:0007517 -0.293992 Muscle organ development (1) 3.0
150 GO:0002437 -0.297868 Inflammatory response to antigenic stimulus (1) 2.0
704 GO:1904707 -0.299166 Positive regulation of vascular associated smooth muscle cell proliferation (1) 0.0
771 GO:0031016 -0.299979 Pancreas development (1) 2.0
937 GO:0051640 -0.307828 Organelle localization (1) 3.0
107 GO:1905563 -0.308080 Negative regulation of vascular endothelial cell proliferation (1) 0.0
404 GO:0007156 -0.311359 Homophilic cell adhesion via plasma membrane adhesion molecules (1) 1.0
238 GO:0043162 -0.311445 Ubiquitin-dependent protein catabolic process via the multivesicular body sorting pathway (1) 1.0
752 GO:0043170 -0.315334 Macromolecule metabolic process (1) 7.0
552 GO:0033365 -0.319737 Protein localization to organelle (1) 3.0
881 GO:0031929 -0.320242 Tor signaling (1) 2.0
484 GO:0007399 -0.320323 Nervous system development (1) 6.0
129 GO:0002250 -0.320676 Adaptive immune response (1) 4.0
741 GO:0016477 -0.323694 Cell migration (1) 4.0
600 GO:0010632 -0.324795 Regulation of epithelial cell migration (1) 3.0
197 GO:0006367 -0.325397 Transcription initiation from rna polymerase ii promoter (1) 2.0
118 GO:0060571 -0.325595 Morphogenesis of an epithelial fold (1) 1.0
505 GO:0007283 -0.329296 Spermatogenesis (1) 2.0
542 GO:0048149 -0.329477 Behavioral response to ethanol (1) 0.0
391 GO:0090068 -0.333050 Positive regulation of cell cycle process (1) 2.0
656 GO:0030154 -0.339581 Cell differentiation (1) 6.0
698 GO:0070663 -0.341786 Regulation of leukocyte proliferation (1) 2.0
164 GO:0051249 -0.342420 Regulation of lymphocyte activation (1) 4.0
720 GO:0051261 -0.344762 Protein depolymerization (1) 2.0
475 GO:0050769 -0.344990 Positive regulation of neurogenesis (1) 2.0
165 GO:0050870 -0.345649 Positive regulation of t cell activation (1) 2.0
432 GO:0038007 -0.346961 Netrin-activated signaling pathway (1) 0.0
246 GO:0008202 -0.348429 Steroid metabolic process (1) 3.0
831 GO:0001975 -0.357673 Response to amphetamine (1) 0.0
177 GO:0045821 -0.361584 Positive regulation of glycolytic process (1) 0.0
127 GO:0002320 -0.363702 Lymphoid progenitor cell differentiation (1) 1.0
399 GO:0098609 -0.364601 Cell-cell adhesion (1) 4.0
599 GO:0017157 -0.372042 Regulation of exocytosis (1) 2.0
790 GO:0034644 -0.372186 Cellular response to uv (1) 1.0
568 GO:0008284 -0.372632 Positive regulation of cell population proliferation (1) 2.0
75 GO:0001818 -0.372931 Negative regulation of cytokine production (1) 1.0
610 GO:0031333 -0.373027 Negative regulation of protein-containing complex assembly (1) 2.0
770 GO:0009887 -0.378818 Animal organ morphogenesis (1) 4.0
18 GO:0000226 -0.382412 Microtubule cytoskeleton organization (1) 3.0
244 GO:0042307 -0.383034 Positive regulation of protein import into nucleus (1) 0.0
510 GO:0042063 -0.386958 Gliogenesis (1) 3.0
252 GO:0046889 -0.389004 Positive regulation of lipid biosynthetic process (1) 1.0
683 GO:0048477 -0.389075 Oogenesis (1) 1.0
686 GO:0097009 -0.390741 Energy homeostasis (1) 0.0
597 GO:0060627 -0.391978 Regulation of vesicle-mediated transport (1) 3.0
594 GO:0051128 -0.392248 Regulation of cellular component organization (1) 5.0
36 GO:0000723 -0.397902 Telomere maintenance (1) 1.0
579 GO:0040018 -0.399025 Positive regulation of multicellular organism growth (1) 0.0
55 GO:0001649 -0.403752 Osteoblast differentiation (1) 1.0
642 GO:0034599 -0.410954 Cellular response to oxidative stress (1) 3.0
318 GO:0006898 -0.412020 Receptor-mediated endocytosis (1) 2.0
518 GO:0042472 -0.414984 Inner ear morphogenesis (1) 1.0
816 GO:0032008 -0.416975 Positive regulation of tor signaling (1) 1.0
808 GO:1902532 -0.418085 Negative regulation of intracellular signal transduction (1) 3.0
135 GO:0042093 -0.420233 T-helper cell differentiation (1) 1.0
364 GO:0140013 -0.421435 Meiotic nuclear division (1) 2.0
536 GO:0007610 -0.428170 Behavior (1) 3.0
267 GO:0045732 -0.434807 Positive regulation of protein catabolic process (1) 2.0
789 GO:0009416 -0.439464 Response to light stimulus (1) 2.0
637 GO:0030198 -0.439683 Extracellular matrix organization (1) 2.0
82 GO:0032743 -0.458060 Positive regulation of interleukin-2 production (1) 0.0
694 GO:0032869 -0.463421 Cellular response to insulin stimulus (1) 2.0
646 GO:0071320 -0.466282 Cellular response to camp (1) 0.0
146 GO:0043029 -0.470875 T cell homeostasis (1) 1.0
911 GO:0060416 -0.475618 Response to growth hormone (1) 1.0
237 GO:0006511 -0.479298 Ubiquitin-dependent protein catabolic process (1) 3.0
819 GO:0014070 -0.480176 Response to organic cyclic compound (1) 4.0
896 GO:0097193 -0.483526 Intrinsic apoptotic signaling pathway (1) 3.0
134 GO:0002366 -0.488727 Leukocyte activation involved in immune response (1) 3.0
493 GO:0001946 -0.489050 Lymphangiogenesis (1) 0.0
689 GO:1905897 -0.495409 Regulation of response to endoplasmic reticulum stress (1) 2.0
478 GO:0048568 -0.497049 Embryonic organ development (1) 3.0
820 GO:0033993 -0.497561 Response to lipid (1) 3.0
934 GO:0051258 -0.500444 Protein polymerization (1) 3.0
800 GO:0030521 -0.503978 Androgen receptor signaling pathway (1) 1.0
276 GO:0016567 -0.510030 Protein ubiquitination (1) 3.0
663 GO:0050821 -0.512838 Protein stabilization (1) 0.0
768 GO:0051147 -0.515025 Regulation of muscle cell differentiation (1) 2.0
384 GO:0007018 -0.516240 Microtubule-based movement (1) 4.0
200 GO:0006281 -0.520134 Dna repair (1) 2.0
496 GO:0048608 -0.521065 Reproductive structure development (1) 2.0
921 GO:0035790 -0.529398 Platelet-derived growth factor receptor-alpha signaling pathway (1) 0.0
207 GO:0006303 -0.530286 Double-strand break repair via nonhomologous end joining (1) 1.0
548 GO:1903829 -0.534357 Positive regulation of protein localization (1) 3.0
889 GO:0098780 -0.534495 Response to mitochondrial depolarisation (1) 1.0
281 GO:0006811 -0.536119 Ion transport (1) 6.0
336 GO:0043524 -0.548174 Negative regulation of neuron apoptotic process (1) 1.0
184 GO:0006275 -0.549978 Regulation of dna replication (1) 2.0
96 GO:0048873 -0.558233 Homeostasis of number of cells within a tissue (1) 0.0
449 GO:0007204 -0.558363 Positive regulation of cytosolic calcium ion concentration (1) 4.0
744 GO:0055082 -0.561669 Cellular chemical homeostasis (1) 5.0
329 GO:0043066 -0.563158 Negative regulation of apoptotic process (1) 4.0
37 GO:0000724 -0.565085 Double-strand break repair via homologous recombination (1) 1.0
871 GO:0042551 -0.565380 Neuron maturation (1) 1.0
525 GO:0048741 -0.566143 Skeletal muscle fiber development (1) 1.0
557 GO:0072655 -0.572037 Establishment of protein localization to mitochondrion (1) 1.0
215 GO:0006417 -0.572163 Regulation of translation (1) 3.0
584 GO:0040008 -0.572873 Regulation of growth (1) 3.0
748 GO:0009056 -0.572913 Catabolic process (1) 5.0
556 GO:0051223 -0.574651 Regulation of protein transport (1) 3.0
433 GO:0097191 -0.576815 Extrinsic apoptotic signaling pathway (1) 3.0
502 GO:0007420 -0.577438 Brain development (1) 4.0
371 GO:0010821 -0.581599 Regulation of mitochondrion organization (1) 2.0
326 GO:0008637 -0.587399 Apoptotic mitochondrial changes (1) 1.0
767 GO:0051146 -0.588274 Striated muscle cell differentiation (1) 2.0
314 GO:0070588 -0.591082 Calcium ion transmembrane transport (1) 4.0
248 GO:0016042 -0.605006 Lipid catabolic process (1) 2.0
411 GO:0009755 -0.607054 Hormone-mediated signaling pathway (1) 2.0
182 GO:0016070 -0.607882 Rna metabolic process (1) 5.0
888 GO:0034976 -0.613362 Response to endoplasmic reticulum stress (1) 3.0
176 GO:0044262 -0.620829 Cellular carbohydrate metabolic process (1) 3.0
846 GO:0099504 -0.627336 Synaptic vesicle cycle (1) 2.0
528 GO:0007565 -0.627705 Female pregnancy (1) 2.0
211 GO:0031507 -0.627760 Heterochromatin assembly (1) 1.0
94 GO:0001889 -0.633760 Liver development (1) 1.0
52 GO:0001541 -0.634171 Ovarian follicle development (1) 1.0
137 GO:0002682 -0.635094 Regulation of immune system process (1) 5.0
620 GO:0051098 -0.635782 Regulation of binding (1) 3.0
266 GO:0030163 -0.652510 Protein catabolic process (1) 4.0
638 GO:0033554 -0.652740 Cellular response to stress (1) 4.0
777 GO:0050680 -0.661189 Negative regulation of epithelial cell proliferation (1) 2.0
425 GO:0060391 -0.664363 Positive regulation of smad protein signal transduction (1) 0.0
375 GO:0051496 -0.667875 Positive regulation of stress fiber assembly (1) 0.0
85 GO:0001823 -0.670272 Mesonephros development (1) 2.0
654 GO:0090398 -0.671529 Cellular senescence (1) 1.0
774 GO:0030216 -0.675144 Keratinocyte differentiation (1) 2.0
316 GO:0033157 -0.678469 Regulation of intracellular protein transport (1) 1.0
693 GO:1904646 -0.683366 Cellular response to amyloid-beta (1) 0.0
186 GO:2000278 -0.687722 Regulation of dna biosynthetic process (1) 1.0
418 GO:2001240 -0.689139 Negative regulation of extrinsic apoptotic signaling pathway in absence of ligand (1) 0.0
117 GO:0060562 -0.691717 Epithelial tube morphogenesis (1) 2.0
644 GO:0071230 -0.699092 Cellular response to amino acid stimulus (1) 1.0
298 GO:0006886 -0.703192 Intracellular protein transport (1) 3.0
473 GO:0008584 -0.709411 Male gonad development (1) 1.0
687 GO:0072384 -0.726414 Organelle transport along microtubule (1) 2.0
785 GO:0009266 -0.729968 Response to temperature stimulus (1) 2.0
116 GO:0002009 -0.733249 Morphogenesis of an epithelium (1) 3.0
867 GO:0045444 -0.735911 Fat cell differentiation (1) 2.0
564 GO:0045596 -0.736029 Negative regulation of cell differentiation (1) 2.0
804 GO:0030855 -0.743127 Epithelial cell differentiation (1) 3.0
887 GO:0034504 -0.747907 Protein localization to nucleus (1) 2.0
64 GO:0071456 -0.749105 Cellular response to hypoxia (1) 1.0
173 GO:0003300 -0.749363 Cardiac muscle hypertrophy (1) 2.0
422 GO:0046628 -0.749367 Positive regulation of insulin receptor signaling pathway (1) 0.0
783 GO:0008630 -0.759892 Intrinsic apoptotic signaling pathway in response to dna damage (1) 2.0
84 GO:0001822 -0.761027 Kidney development (1) 3.0
208 GO:0006325 -0.765908 Chromatin organization (1) 3.0
349 GO:0055118 -0.766833 Negative regulation of cardiac muscle contraction (1) 0.0
497 GO:0060041 -0.772237 Retina development in camera-type eye (1) 1.0
836 GO:1901987 -0.773018 Regulation of cell cycle phase transition (1) 3.0
850 GO:0070932 -0.775462 Histone h3 deacetylation (1) 0.0
522 GO:0048738 -0.777561 Cardiac muscle tissue development (1) 2.0
26 GO:1901990 -0.788841 Regulation of mitotic cell cycle phase transition (1) 2.0
606 GO:0048638 -0.795928 Regulation of developmental growth (1) 2.0
143 GO:0038096 -0.798475 Fc-gamma receptor signaling pathway involved in phagocytosis (1) 0.0
649 GO:0071478 -0.804830 Cellular response to radiation (1) 2.0
775 GO:0022612 -0.808633 Gland morphogenesis (1) 2.0
92 GO:0010718 -0.808642 Positive regulation of epithelial to mesenchymal transition (1) 1.0
534 GO:0030193 -0.809407 Regulation of blood coagulation (1) 2.0
185 GO:0051054 -0.817126 Positive regulation of dna metabolic process (1) 2.0
930 GO:1902074 -0.824018 Response to salt (1) 1.0
514 GO:0021575 -0.825947 Hindbrain morphogenesis (1) 1.0
876 GO:0046632 -0.839055 Alpha-beta t cell differentiation (1) 2.0
558 GO:0016032 -0.841189 Viral process (1) 3.0
490 GO:0007435 -0.854168 Salivary gland morphogenesis (1) 1.0
359 GO:0007010 -0.858980 Cytoskeleton organization (1) 4.0
718 GO:0008361 -0.861438 Regulation of cell size (1) 2.0
801 GO:0033143 -0.875603 Regulation of intracellular steroid hormone receptor signaling pathway (1) 1.0
179 GO:0006096 -0.888237 Glycolytic process (1) 1.0
157 GO:0048538 -0.899567 Thymus development (1) 0.0
886 GO:0034502 -0.908113 Protein localization to chromosome (1) 2.0
647 GO:0071392 -0.910313 Cellular response to estradiol stimulus (1) 0.0
457 GO:0035022 -0.913306 Positive regulation of rac protein signal transduction (1) 0.0
56 GO:0045668 -0.913637 Negative regulation of osteoblast differentiation (1) 0.0
843 GO:0014823 -0.918185 Response to activity (1) 1.0
376 GO:0007015 -0.919887 Actin filament organization (1) 3.0
166 GO:0030890 -0.921340 Positive regulation of b cell proliferation (1) 0.0
526 GO:0048743 -0.926052 Positive regulation of skeletal muscle fiber development (1) 0.0
21 GO:0051225 -0.929849 Spindle assembly (1) 2.0
596 GO:0060341 -0.932901 Regulation of cellular localization (1) 3.0
140 GO:0045321 -0.933341 Leukocyte activation (1) 5.0
855 GO:1903578 -0.938108 Regulation of atp metabolic process (1) 1.0
100 GO:0033138 -0.947197 Positive regulation of peptidyl-serine phosphorylation (1) 1.0
34 GO:1903146 -0.948778 Regulation of autophagy of mitochondrion (1) 1.0
120 GO:1905278 -0.954451 Positive regulation of epithelial tube formation (1) 0.0
726 GO:0065003 -0.957268 Protein-containing complex assembly (1) 4.0
330 GO:0071839 -0.961212 Apoptotic process in bone marrow cell (1) 0.0
902 GO:0046631 -0.968398 Alpha-beta t cell activation (1) 3.0
621 GO:0032410 -0.978890 Negative regulation of transporter activity (1) 1.0
17 GO:0000209 -0.995522 Protein polyubiquitination (1) 2.0
840 GO:0043154 -0.996503 Negative regulation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
35 GO:0061734 -0.998003 Parkin-mediated stimulation of mitophagy in response to mitochondrial depolarization (1) 0.0
387 GO:0051321 -1.010241 Meiotic cell cycle (1) 3.0
6 GO:0010971 -1.013845 Positive regulation of g2/m transition of mitotic cell cycle (1) 0.0
488 GO:0030878 -1.022587 Thyroid gland development (1) 0.0
598 GO:0043254 -1.024401 Regulation of protein-containing complex assembly (1) 3.0
161 GO:0030316 -1.031163 Osteoclast differentiation (1) 2.0
315 GO:0046902 -1.038293 Regulation of mitochondrial membrane permeability (1) 1.0
758 GO:0031099 -1.044014 Regeneration (1) 2.0
356 GO:0006996 -1.050513 Organelle organization (1) 5.0
547 GO:0032880 -1.061375 Regulation of protein localization (1) 4.0
470 GO:0042733 -1.062045 Embryonic digit morphogenesis (1) 0.0
590 GO:0050792 -1.063199 Regulation of viral process (1) 2.0
875 GO:0033077 -1.072052 T cell differentiation in thymus (1) 1.0
25 GO:0045930 -1.075304 Negative regulation of mitotic cell cycle (1) 2.0
293 GO:0032024 -1.085745 Positive regulation of insulin secretion (1) 1.0
305 GO:0034220 -1.105994 Ion transmembrane transport (1) 5.0
279 GO:0018205 -1.119880 Peptidyl-lysine modification (1) 4.0
300 GO:0032386 -1.120402 Regulation of intracellular transport (1) 2.0
19 GO:0031109 -1.121358 Microtubule polymerization or depolymerization (1) 2.0
302 GO:0006869 -1.132701 Lipid transport (1) 3.0
368 GO:0060271 -1.144707 Cilium assembly (1) 3.0
258 GO:0006694 -1.145934 Steroid biosynthetic process (1) 2.0
571 GO:2000010 -1.155468 Positive regulation of protein localization to cell surface (1) 0.0
880 GO:0031667 -1.156877 Response to nutrient levels (1) 4.0
220 GO:0016572 -1.176296 Histone phosphorylation (1) 1.0
805 GO:0090090 -1.179703 Negative regulation of canonical wnt signaling pathway (1) 0.0
856 GO:0019722 -1.180979 Calcium-mediated signaling (1) 2.0
481 GO:0030324 -1.181819 Lung development (1) 2.0
927 GO:0042632 -1.192404 Cholesterol homeostasis (1) 0.0
334 GO:2000811 -1.206493 Negative regulation of anoikis (1) 0.0
696 GO:0030336 -1.218248 Negative regulation of cell migration (1) 2.0
891 GO:0034405 -1.223979 Response to fluid shear stress (1) 1.0
61 GO:0001658 -1.225087 Branching involved in ureteric bud morphogenesis (1) 1.0
672 GO:0032091 -1.230462 Negative regulation of protein binding (1) 1.0
292 GO:0046883 -1.239220 Regulation of hormone secretion (1) 3.0
585 GO:0048589 -1.242926 Developmental growth (1) 4.0
634 GO:0061024 -1.243050 Membrane organization (1) 2.0
324 GO:0016241 -1.250511 Regulation of macroautophagy (1) 2.0
554 GO:0072659 -1.270281 Protein localization to plasma membrane (1) 2.0
476 GO:0048714 -1.271201 Positive regulation of oligodendrocyte differentiation (1) 0.0
603 GO:0061045 -1.272009 Negative regulation of wound healing (1) 2.0
917 GO:1900020 -1.277862 Positive regulation of protein kinase c activity (1) 0.0
407 GO:0030010 -1.292763 Establishment of cell polarity (1) 1.0
559 GO:0022414 -1.306268 Reproductive process (1) 4.0
199 GO:0006270 -1.310311 Dna replication initiation (1) 1.0
327 GO:0033028 -1.310645 Myeloid cell apoptotic process (1) 1.0
221 GO:0018105 -1.313496 Peptidyl-serine phosphorylation (1) 2.0
882 GO:0032147 -1.314908 Activation of protein kinase activity (1) 1.0
297 GO:0006839 -1.314960 Mitochondrial transport (1) 2.0
773 GO:0060612 -1.316095 Adipose tissue development (1) 1.0
815 GO:1901224 -1.319667 Positive regulation of nik/nf-kappab signaling (1) 0.0
551 GO:1903077 -1.319677 Negative regulation of protein localization to plasma membrane (1) 1.0
149 GO:0050853 -1.320551 B cell receptor signaling pathway (1) 1.0
807 GO:1901796 -1.330292 Regulation of signal transduction by p53 class mediator (1) 1.0
682 GO:0007026 -1.340212 Negative regulation of microtubule depolymerization (1) 0.0
515 GO:0021549 -1.341320 Cerebellum development (1) 2.0
798 GO:0051384 -1.346453 Response to glucocorticoid (1) 1.0
390 GO:0044770 -1.348006 Cell cycle phase transition (1) 4.0
198 GO:0006260 -1.359349 Dna replication (1) 3.0
614 GO:0035264 -1.365256 Multicellular organism growth (1) 1.0
5 GO:0000086 -1.366187 G2/m transition of mitotic cell cycle (1) 1.0
628 GO:0043467 -1.377923 Regulation of generation of precursor metabolites and energy (1) 2.0
616 GO:0001556 -1.391091 Oocyte maturation (1) 0.0
201 GO:0006310 -1.394633 Dna recombination (1) 3.0
243 GO:0006606 -1.399288 Protein import into nucleus (1) 1.0
104 GO:0006469 -1.403847 Negative regulation of protein kinase activity (1) 2.0
784 GO:0042771 -1.409468 Intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 1.0
204 GO:0006261 -1.419899 Dna-dependent dna replication (1) 2.0
2 GO:0000082 -1.425082 G1/s transition of mitotic cell cycle (1) 2.0
878 GO:0030593 -1.429383 Neutrophil chemotaxis (1) 1.0
839 GO:0031047 -1.438952 Gene silencing by rna (1) 2.0
15 GO:0070373 -1.439598 Negative regulation of erk1 and erk2 cascade (1) 0.0
367 GO:1901029 -1.442702 Negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway (1) 0.0
385 GO:0060632 -1.444681 Regulation of microtubule-based movement (1) 1.0
285 GO:0051051 -1.447128 Negative regulation of transport (1) 3.0
1 GO:0045737 -1.452510 Positive regulation of cyclin-dependent protein serine/threonine kinase activity (1) 0.0
928 GO:0051453 -1.491236 Regulation of intracellular ph (1) 1.0
569 GO:0030307 -1.507144 Positive regulation of cell growth (1) 2.0
264 GO:0042177 -1.517738 Negative regulation of protein catabolic process (1) 1.0
442 GO:0008286 -1.524273 Insulin receptor signaling pathway (1) 1.0
524 GO:0007519 -1.531881 Skeletal muscle tissue development (1) 2.0
65 GO:0001701 -1.542908 In utero embryonic development (1) 2.0
174 GO:0010613 -1.546047 Positive regulation of cardiac muscle hypertrophy (1) 1.0
167 GO:0045637 -1.553460 Regulation of myeloid cell differentiation (1) 2.0
213 GO:0006396 -1.579762 Rna processing (1) 4.0
925 GO:1990403 -1.599465 Embryonic brain development (1) 0.0
62 GO:0001662 -1.634974 Behavioral fear response (1) 1.0
112 GO:0060789 -1.642170 Hair follicle placode formation (1) 0.0
321 GO:0010507 -1.662573 Negative regulation of autophagy (1) 1.0
193 GO:0006368 -1.663610 Transcription elongation from rna polymerase ii promoter (1) 1.0
910 GO:0051354 -1.665789 Negative regulation of oxidoreductase activity (1) 1.0
648 GO:0071549 -1.672280 Cellular response to dexamethasone stimulus (1) 0.0
240 GO:0006605 -1.701265 Protein targeting (1) 2.0
251 GO:0045833 -1.715199 Negative regulation of lipid metabolic process (1) 2.0
341 GO:2001236 -1.716145 Regulation of extrinsic apoptotic signaling pathway (1) 2.0
322 GO:0010508 -1.734826 Positive regulation of autophagy (1) 2.0
344 GO:1902236 -1.763757 Negative regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway (1) 0.0
828 GO:0010039 -1.764349 Response to iron ion (1) 1.0
577 GO:2001021 -1.770162 Negative regulation of response to dna damage stimulus (1) 1.0
342 GO:2001243 -1.793574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0
830 GO:0071480 -1.811677 Cellular response to gamma radiation (1) 0.0
241 GO:0006612 -1.832631 Protein targeting to membrane (1) 1.0
550 GO:1904950 -1.845239 Negative regulation of establishment of protein localization (1) 2.0
586 GO:2000773 -1.847992 Negative regulation of cellular senescence (1) 0.0
33 GO:0000423 -1.852021 Mitophagy (1) 1.0
145 GO:0001782 -1.865421 B cell homeostasis (1) 0.0
527 GO:0007528 -1.888030 Neuromuscular junction development (1) 1.0
931 GO:0002931 -1.889223 Response to ischemia (1) 0.0
111 GO:0031069 -1.907723 Hair follicle morphogenesis (1) 0.0
575 GO:0030308 -1.913032 Negative regulation of cell growth (1) 1.0
393 GO:1901988 -1.952405 Negative regulation of cell cycle phase transition (1) 2.0
68 GO:0042659 -1.958074 Regulation of cell fate specification (1) 0.0
834 GO:0035195 -1.964770 Gene silencing by mirna (1) 1.0
271 GO:2000757 -1.982632 Negative regulation of peptidyl-lysine acetylation (1) 1.0
451 GO:0007263 -1.986508 Nitric oxide mediated signal transduction (1) 1.0
677 GO:0010977 -2.012737 Negative regulation of neuron projection development (1) 1.0
822 GO:0046898 -2.020251 Response to cycloheximide (1) 0.0
893 GO:0035094 -2.023491 Response to nicotine (1) 1.0
635 GO:0099173 -2.072765 Postsynapse organization (1) 2.0
884 GO:0032469 -2.081057 Endoplasmic reticulum calcium ion homeostasis (1) 1.0
538 GO:0048266 -2.139693 Behavioral response to pain (1) 0.0
787 GO:0042149 -2.188398 Cellular response to glucose starvation (1) 0.0
756 GO:0021695 -2.195335 Cerebellar cortex development (1) 1.0
662 GO:0031648 -2.195396 Protein destabilization (1) 0.0
727 GO:0070842 -2.214919 Aggresome assembly (1) 0.0
4 GO:2000134 -2.218448 Negative regulation of g1/s transition of mitotic cell cycle (1) 1.0
500 GO:0046666 -2.221412 Retinal cell programmed cell death (1) 0.0
151 GO:0006959 -2.228889 Humoral immune response (1) 2.0
216 GO:0045727 -2.232845 Positive regulation of translation (1) 1.0
274 GO:0034983 -2.241321 Peptidyl-lysine deacetylation (1) 0.0
565 GO:1902455 -2.310039 Negative regulation of stem cell population maintenance (1) 0.0
372 GO:0070584 -2.320318 Mitochondrion morphogenesis (1) 0.0
343 GO:1902166 -2.325709 Negative regulation of intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 0.0
512 GO:0008045 -2.344717 Motor neuron axon guidance (1) 1.0
769 GO:0010832 -2.404325 Negative regulation of myotube differentiation (1) 0.0
905 GO:0050864 -2.430784 Regulation of b cell activation (1) 2.0
338 GO:1900118 -2.496567 Negative regulation of execution phase of apoptosis (1) 0.0
426 GO:0090263 -2.498030 Positive regulation of canonical wnt signaling pathway (1) 0.0
540 GO:0007617 -2.520665 Mating behavior (1) 1.0
872 GO:0002326 -2.609504 B cell lineage commitment (1) 0.0
680 GO:0031640 -2.667289 Killing of cells of another organism (1) 1.0
278 GO:0016573 -2.686301 Histone acetylation (1) 3.0
156 GO:0048536 -2.714980 Spleen development (1) 0.0
308 GO:0051926 -2.798095 Negative regulation of calcium ion transport (1) 1.0
721 GO:1905710 -2.815392 Positive regulation of membrane permeability (1) 1.0
665 GO:0045636 -2.820949 Positive regulation of melanocyte differentiation (1) 0.0
719 GO:0043244 -2.878623 Regulation of protein-containing complex disassembly (1) 2.0
195 GO:0006360 -2.938382 Transcription by rna polymerase i (1) 2.0
22 GO:0007098 -3.141939 Centrosome cycle (1) 2.0
892 GO:0070059 -3.411097 Intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress (1) 1.0
189 GO:1903800 -3.468461 Positive regulation of production of mirnas involved in gene silencing by mirna (1) 0.0
844 GO:0043922 -3.856003 Negative regulation by host of viral transcription (1) 0.0
452 GO:0010750 -3.917943 Positive regulation of nitric oxide mediated signal transduction (1) 0.0
852 GO:0036289 -8.673968 Peptidyl-serine autophosphorylation (1) 0.0

Final model SVM

Once the models have been cross-validated we create the final models using all samples…

Code
GO_terms_auc_svm_final = {}
GO_terms_aupr_svm_final = {}
GO_terms_precision_svm_final = {}
models_svm = {}

# Perform logistics
for goterm in sparseGO_terms:
    #print(goterm)
    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
    
    if sum(goterm_drugs) <= 8:
        continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    score_mod = score.divide(score.std()).fillna(0)
    
    X_train = score_mod
    X_test = score_mod
    y_train = goterm_drugs
    y_test = goterm_drugs
    
    #gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
    gamma="scale"
    C=1
    
    
        
    svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
                           class_weight="balanced",
                            tol=0.001,
                            probability=True,
                           random_state=1234)
    # fit the model with data
    svm_model.fit(X_train,y_train)
    y_pred=svm_model.predict(X_test)

    #auc
    y_pred_proba = svm_model.predict_proba(X_test)[::,1]  # platt values
    #y_pred_proba = svm_model.decision_function(X_test)
    
    GO_terms_auc_svm_final[goterm] = metrics.roc_auc_score(y_test, y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)
    GO_terms_aupr_svm_final[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_svm_final[goterm] = metrics.precision_score(y_test, y_pred)
    models_svm[goterm]=svm_model
Code
len(models_svm)
939

Final model AUC

Code
GO_terms_auc_svm_df_final = pd.DataFrame(list(GO_terms_auc_svm_final.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df_final = GO_terms_auc_svm_df_final.dropna()
GO_terms_auc_svm_df_final.sort_values(by=["auc"], ascending=False)
auc
goterm
GO:0036289 1.000000
GO:0060440 0.998540
GO:0043162 0.995455
GO:0070059 0.994760
GO:0071364 0.994109
GO:1901029 0.994048
GO:0072384 0.993636
GO:0051453 0.993393
GO:0001556 0.991972
GO:0090201 0.991808
GO:0010750 0.990909
GO:0016573 0.990783
GO:1903800 0.990573
GO:1904950 0.989945
GO:1902455 0.989091
GO:0042149 0.987697
GO:0034983 0.987273
GO:1990403 0.985909
GO:0071353 0.985587
GO:0006275 0.984226
GO:0010971 0.984091
GO:0006869 0.983409
GO:0001779 0.983182
GO:0051973 0.981651
GO:0060749 0.980895
GO:0042771 0.980633
GO:0072655 0.980455
GO:0061734 0.980455
GO:0045636 0.980178
GO:0045737 0.980084
GO:1902236 0.979762
GO:0060632 0.979545
GO:0016575 0.978731
GO:0042659 0.977727
GO:0046628 0.977376
GO:1902042 0.977273
GO:0098780 0.975909
GO:0046902 0.975849
GO:0051607 0.975552
GO:0006401 0.974678
GO:0017157 0.974040
GO:0032740 0.973856
GO:0006270 0.973848
GO:0046666 0.973570
GO:0008045 0.972603
GO:0006303 0.972553
GO:0042177 0.972431
GO:0060020 0.972290
GO:0006360 0.972095
GO:2001021 0.971520
GO:0042733 0.971364
GO:0016572 0.971342
GO:0070932 0.970909
GO:2001257 0.970909
GO:0001782 0.970384
GO:0006261 0.970112
GO:1905564 0.969834
GO:2000757 0.969545
GO:0051354 0.969091
GO:0072284 0.969069
GO:0051926 0.968891
GO:0043407 0.968585
GO:0034394 0.968096
GO:0050870 0.967621
GO:0046898 0.967143
GO:0031047 0.967115
GO:0016925 0.966364
GO:0035790 0.966361
GO:0006417 0.965261
GO:0032469 0.965008
GO:0035195 0.964816
GO:0021782 0.964091
GO:0070584 0.963810
GO:0051384 0.961083
GO:0002326 0.960811
GO:2000773 0.960310
GO:0050729 0.959779
GO:0046942 0.959480
GO:0035249 0.959091
GO:0045821 0.958904
GO:0099111 0.958880
GO:0071670 0.958851
GO:0006367 0.958333
GO:1905278 0.958270
GO:0010559 0.957929
GO:0006959 0.957854
GO:0018205 0.957782
GO:0035860 0.957768
GO:0031640 0.957381
GO:0007059 0.957268
GO:0070373 0.956762
GO:0030282 0.956762
GO:0001658 0.956522
GO:0030890 0.956075
GO:0035754 0.955757
GO:0010832 0.955455
GO:0099173 0.955238
GO:0021695 0.955238
GO:0045727 0.955026
GO:0002862 0.954696
GO:0014827 0.954432
GO:0016579 0.953923
GO:0002718 0.953854
GO:0071320 0.953746
GO:0051281 0.953182
GO:0042552 0.953182
GO:0000086 0.953095
GO:0032147 0.952991
GO:0032436 0.952499
GO:0010592 0.952273
GO:0006694 0.951735
GO:0033141 0.951735
GO:0071480 0.951429
GO:0006612 0.951118
GO:0048011 0.950729
GO:1903077 0.950714
GO:0033619 0.950455
GO:0006352 0.950306
GO:0001662 0.950221
GO:0010039 0.950040
GO:0090314 0.949147
GO:0034502 0.949074
GO:0014823 0.948954
GO:2001240 0.948220
GO:0007617 0.948182
GO:0032743 0.947281
GO:0006310 0.947141
GO:0006605 0.946678
GO:0006975 0.946204
GO:2000739 0.946101
GO:1902459 0.945909
GO:0007626 0.945701
GO:0023019 0.945116
GO:0003376 0.944700
GO:0006576 0.944346
GO:0038007 0.943690
GO:0050728 0.943637
GO:0032922 0.942661
GO:0045740 0.942465
GO:1900118 0.942381
GO:0010952 0.942143
GO:1905710 0.942143
GO:1902166 0.942128
GO:0008637 0.941950
GO:2000010 0.941865
GO:0055118 0.941679
GO:0000423 0.941364
GO:0043154 0.941156
GO:0048701 0.940775
GO:0008210 0.940749
GO:1900272 0.940171
GO:0060997 0.939809
GO:0007263 0.939545
GO:2000379 0.939167
GO:1900020 0.939091
GO:0050896 0.938915
GO:0016485 0.938636
GO:0043966 0.938376
GO:0002437 0.938295
GO:2000300 0.937318
GO:0140013 0.937095
GO:0034767 0.936758
GO:0031648 0.936624
GO:0007026 0.936364
GO:0032024 0.936149
GO:0030193 0.936040
GO:0010212 0.935098
GO:0006457 0.934641
GO:0032729 0.934420
GO:0030593 0.934413
GO:0010575 0.934272
GO:0008064 0.933643
GO:0008286 0.932331
GO:0001818 0.932128
GO:0030513 0.931404
GO:0060766 0.931364
GO:0006396 0.931346
GO:0006919 0.931342
GO:0038096 0.930886
GO:0001553 0.930810
GO:0045580 0.930407
GO:0046326 0.930406
GO:0035025 0.930294
GO:1903146 0.929091
GO:0060444 0.929091
GO:0006412 0.928571
GO:0048536 0.928290
GO:0002819 0.927685
GO:0048704 0.927370
GO:0051054 0.927333
GO:0090184 0.927099
GO:1900006 0.926941
GO:2000134 0.926917
GO:0046889 0.926822
GO:0043123 0.926512
GO:0070842 0.926364
GO:0046329 0.926364
GO:0006898 0.925891
GO:0006368 0.925841
GO:1905897 0.925743
GO:0030048 0.925591
GO:0042180 0.925076
GO:0035909 0.924883
GO:0051209 0.924065
GO:0030308 0.923951
GO:0043170 0.923707
GO:0035726 0.922783
GO:0031663 0.922727
GO:0000209 0.922119
GO:0009165 0.921544
GO:0002720 0.921427
GO:0006096 0.921292
GO:1902036 0.921254
GO:0071549 0.921066
GO:0007528 0.920950
GO:0090090 0.920930
GO:0042472 0.920455
GO:0031056 0.920429
GO:0050864 0.920262
GO:0060789 0.920000
GO:0007389 0.919762
GO:0048743 0.919572
GO:0030705 0.919116
GO:0060179 0.919091
GO:0045739 0.918823
GO:0043627 0.917977
GO:0040018 0.917659
GO:2001243 0.917078
GO:0090037 0.917056
GO:0040016 0.915987
GO:0043552 0.915951
GO:0001666 0.915013
GO:0010508 0.914755
GO:0033690 0.914545
GO:0098586 0.914419
GO:0043922 0.914091
GO:0035994 0.914021
GO:0031398 0.913694
GO:0042093 0.913524
GO:0032410 0.913182
GO:1901224 0.913182
GO:0006839 0.913167
GO:0045907 0.912844
GO:2000278 0.912619
GO:2001236 0.912563
GO:0048170 0.912474
GO:0071839 0.912217
GO:0031507 0.911552
GO:0060391 0.911011
GO:0032148 0.910451
GO:0070102 0.910000
GO:0030878 0.909762
GO:0035162 0.909463
GO:0051225 0.909314
GO:0002931 0.909064
GO:0007411 0.908683
GO:0008625 0.908500
GO:0035788 0.908313
GO:0010921 0.907360
GO:0048266 0.906977
GO:0010977 0.906667
GO:0050910 0.906656
GO:0045732 0.906062
GO:0046620 0.905714
GO:0035855 0.905551
GO:0030316 0.905551
GO:0006469 0.905340
GO:0090263 0.905136
GO:0021953 0.904874
GO:0060312 0.904790
GO:0006260 0.904703
GO:0030521 0.904434
GO:0008016 0.904091
GO:0010727 0.904091
GO:0030509 0.904035
GO:0007498 0.903914
GO:0050769 0.903592
GO:0050792 0.903414
GO:0009582 0.903167
GO:0007098 0.902745
GO:0002821 0.902464
GO:0071276 0.902162
GO:0007286 0.901132
GO:0045088 0.900952
GO:0055003 0.900943
GO:0035767 0.900748
GO:0045987 0.900474
GO:0061029 0.900474
GO:0033327 0.900465
GO:0000422 0.900374
GO:0010976 0.900117
GO:0008354 0.899895
GO:0070528 0.899726
GO:0006807 0.899601
GO:0045833 0.899128
GO:1905065 0.898923
GO:0007018 0.898915
GO:0007422 0.898647
GO:0048484 0.898636
GO:0032467 0.898182
GO:0050795 0.897909
GO:0030539 0.897909
GO:0048538 0.897833
GO:0032355 0.897646
GO:0007416 0.897554
GO:0021575 0.897509
GO:0060348 0.897410
GO:0001569 0.897282
GO:0060384 0.897171
GO:0031069 0.897099
GO:0050918 0.897059
GO:0035584 0.896905
GO:0051046 0.896369
GO:0043129 0.896233
GO:0001843 0.896024
GO:0046330 0.895444
GO:0007030 0.895429
GO:0048873 0.895092
GO:0000724 0.894922
GO:0007202 0.894511
GO:1903053 0.894419
GO:0003338 0.894238
GO:1901990 0.894150
GO:0060644 0.893917
GO:0043161 0.893782
GO:0030838 0.892727
GO:0001946 0.892571
GO:0072210 0.892039
GO:0030101 0.892003
GO:0050731 0.892003
GO:0010613 0.891865
GO:0030325 0.891865
GO:0048714 0.891783
GO:0048008 0.891667
GO:0001823 0.890989
GO:0016239 0.890496
GO:0030216 0.890460
GO:0071300 0.890341
GO:0032008 0.889952
GO:0061045 0.889881
GO:0051894 0.889619
GO:0030010 0.889612
GO:0031016 0.889533
GO:0001942 0.889526
GO:1902533 0.889155
GO:0016358 0.888660
GO:0001501 0.888280
GO:0051092 0.888251
GO:0016601 0.887883
GO:0097067 0.887324
GO:0009306 0.887019
GO:0048167 0.886555
GO:0050921 0.886315
GO:1990384 0.886268
GO:0046883 0.886202
GO:0007519 0.886154
GO:0043270 0.885881
GO:0003007 0.885720
GO:0071900 0.885420
GO:0007585 0.885391
GO:2001214 0.885258
GO:0071456 0.884685
GO:0016567 0.884594
GO:0060740 0.882856
GO:0035094 0.882732
GO:0072073 0.882732
GO:0060612 0.881602
GO:0060325 0.881498
GO:0045668 0.881347
GO:0042531 0.881332
GO:0010038 0.881167
GO:0071333 0.880972
GO:0006939 0.880907
GO:0090141 0.880907
GO:0046718 0.880697
GO:0051770 0.880461
GO:0033627 0.880455
GO:0048149 0.880352
GO:0002685 0.880291
GO:0043029 0.880195
GO:0038033 0.879699
GO:0055119 0.879336
GO:0003300 0.878843
GO:0005984 0.878788
GO:0002218 0.878773
GO:0072239 0.878669
GO:0031103 0.878667
GO:0048557 0.878638
GO:1901987 0.878627
GO:0060048 0.877703
GO:0045637 0.877659
GO:2001234 0.877406
GO:0038083 0.876762
GO:0071277 0.876323
GO:0048839 0.876278
GO:0000723 0.875714
GO:0060627 0.875648
GO:0035022 0.874811
GO:0007435 0.874669
GO:2001241 0.874309
GO:0002062 0.874091
GO:0035234 0.873792
GO:0034976 0.873754
GO:0007584 0.872411
GO:0002318 0.872408
GO:0001975 0.872354
GO:0071230 0.871837
GO:0034446 0.871788
GO:0070933 0.871364
GO:0030072 0.871331
GO:0071897 0.871171
GO:0035733 0.870478
GO:0032967 0.870403
GO:0048675 0.870071
GO:0060571 0.870035
GO:0050920 0.869917
GO:0050678 0.869106
GO:0034405 0.869048
GO:0051150 0.868932
GO:0001934 0.868720
GO:0010507 0.868700
GO:1904707 0.868636
GO:0050821 0.868325
GO:0006811 0.868262
GO:0070588 0.868155
GO:0014911 0.867596
GO:0090280 0.867440
GO:0008630 0.867386
GO:1901796 0.867386
GO:0051056 0.867368
GO:0051321 0.865996
GO:0051051 0.865833
GO:0051902 0.865573
GO:0097009 0.865089
GO:0060271 0.865061
GO:0045930 0.864995
GO:0035304 0.864977
GO:0051899 0.864866
GO:0033028 0.864808
GO:0018108 0.864767
GO:1900087 0.864434
GO:0010467 0.863952
GO:0035019 0.863557
GO:0006687 0.863557
GO:0001824 0.863532
GO:0033689 0.863522
GO:0071392 0.863443
GO:0035264 0.863252
GO:0046632 0.862800
GO:0034605 0.862619
GO:0032091 0.862599
GO:0072659 0.862358
GO:0051901 0.861670
GO:0006357 0.861504
GO:0042475 0.861448
GO:0045747 0.861374
GO:0072006 0.860598
GO:0042220 0.860483
GO:0006937 0.860353
GO:0006511 0.860111
GO:0010718 0.859229
GO:0035924 0.859169
GO:0090398 0.859050
GO:0031532 0.858981
GO:1904062 0.858745
GO:2000251 0.858605
GO:0014068 0.858156
GO:0048146 0.858102
GO:0051090 0.857756
GO:0034765 0.857317
GO:0007229 0.856812
GO:0007158 0.856712
GO:1901031 0.856712
GO:0061351 0.856372
GO:1904019 0.856183
GO:0048812 0.856107
GO:0060437 0.855565
GO:0034766 0.854758
GO:0033143 0.854574
GO:0007269 0.854497
GO:0032516 0.854484
GO:0036120 0.854433
GO:0090068 0.853947
GO:0046854 0.853881
GO:0010811 0.853842
GO:0060976 0.853774
GO:0060045 0.853680
GO:0021549 0.853311
GO:0043534 0.853142
GO:0038084 0.853135
GO:0046427 0.852947
GO:0030324 0.852866
GO:0048010 0.852488
GO:0097193 0.852297
GO:0048286 0.852143
GO:0006468 0.851852
GO:0060326 0.851772
GO:0034097 0.851678
GO:0016071 0.851667
GO:0036324 0.851085
GO:1903010 0.851085
GO:0002327 0.850962
GO:0001570 0.850955
GO:0043536 0.850601
GO:0043406 0.850494
GO:0045347 0.850455
GO:0001701 0.850196
GO:0019222 0.849913
GO:0051403 0.849741
GO:0097021 0.849170
GO:0043467 0.848706
GO:0045766 0.848621
GO:0060562 0.848060
GO:0030001 0.847486
GO:0006810 0.847446
GO:0031667 0.847070
GO:0048565 0.846000
GO:0019827 0.845649
GO:0007565 0.845356
GO:0009966 0.844893
GO:0055085 0.844768
GO:0043114 0.844749
GO:0002548 0.844626
GO:2000377 0.844341
GO:0030198 0.844187
GO:0032386 0.844167
GO:0031929 0.844150
GO:0035306 0.843956
GO:0006897 0.843955
GO:0051301 0.843815
GO:0001656 0.843809
GO:0042060 0.843773
GO:0031109 0.843563
GO:0000122 0.843521
GO:0043124 0.843017
GO:0001837 0.842638
GO:1902275 0.841719
GO:0051261 0.841719
GO:0051924 0.841520
GO:0002250 0.841465
GO:0030336 0.841059
GO:0046631 0.840909
GO:0016055 0.840841
GO:0033077 0.840735
GO:0048741 0.840370
GO:0007266 0.839667
GO:0001938 0.838948
GO:0043586 0.838898
GO:0008277 0.837920
GO:0043303 0.837858
GO:0070662 0.837526
GO:0060374 0.836916
GO:0045087 0.836889
GO:0034220 0.836107
GO:0032388 0.835532
GO:0048568 0.835305
GO:0050866 0.835227
GO:0009058 0.834946
GO:1902074 0.834912
GO:0043244 0.834906
GO:0008542 0.834749
GO:0045055 0.834433
GO:0045444 0.834286
GO:0046578 0.834019
GO:0046777 0.833773
GO:0001889 0.833595
GO:0008584 0.833556
GO:0045840 0.833536
GO:0002366 0.833530
GO:0007049 0.833424
GO:0046474 0.833392
GO:0019233 0.833182
GO:0000165 0.832917
GO:0051258 0.832656
GO:0032956 0.832450
GO:0022612 0.832326
GO:0051050 0.832281
GO:0043392 0.831905
GO:0031274 0.831814
GO:0051702 0.831506
GO:0010564 0.831039
GO:0031099 0.830615
GO:1905563 0.830607
GO:0030318 0.830136
GO:0048598 0.829861
GO:0007165 0.829719
GO:1901988 0.829474
GO:0007186 0.829429
GO:0033157 0.829023
GO:0019221 0.829000
GO:0000278 0.828800
GO:0042310 0.828784
GO:1901300 0.828616
GO:0006909 0.828497
GO:0030154 0.828332
GO:0002573 0.827001
GO:0045429 0.826889
GO:0051223 0.826823
GO:0016570 0.826822
GO:0030163 0.826442
GO:0009791 0.826355
GO:0090630 0.826069
GO:0032409 0.825426
GO:0048477 0.824868
GO:0034644 0.824849
GO:0007346 0.824841
GO:0046651 0.824539
GO:0051171 0.823977
GO:0000302 0.823816
GO:0048608 0.823637
GO:0032940 0.823481
GO:0008610 0.823469
GO:0010628 0.823151
GO:1903078 0.822244
GO:0016032 0.821730
GO:0009888 0.821458
GO:0016042 0.821320
GO:0007259 0.820971
GO:0008544 0.820813
GO:0000077 0.820719
GO:0021766 0.820586
GO:0001817 0.819733
GO:0001932 0.819683
GO:0002053 0.819493
GO:0072593 0.819390
GO:0009887 0.819242
GO:0006753 0.818971
GO:0071383 0.818684
GO:0007015 0.818627
GO:0001819 0.818452
GO:0007275 0.818394
GO:1903829 0.818083
GO:0002244 0.818060
GO:0051898 0.817795
GO:0009410 0.817265
GO:0030335 0.817025
GO:0061024 0.816492
GO:0007173 0.816349
GO:0050900 0.816242
GO:0060395 0.815909
GO:0009755 0.815667
GO:0045860 0.815613
GO:0050872 0.815367
GO:0007612 0.814548
GO:0000082 0.814519
GO:0050852 0.814267
GO:0043408 0.813977
GO:0002009 0.813874
GO:0019752 0.813530
GO:0001822 0.813506
GO:0007179 0.813500
GO:0051049 0.813439
GO:0010033 0.813421
GO:1901135 0.813379
GO:1900180 0.813213
GO:0033554 0.813172
GO:0007204 0.813136
GO:0044770 0.812960
GO:0001755 0.812831
GO:0001541 0.812614
GO:0006470 0.811795
GO:0009743 0.811594
GO:0033993 0.811585
GO:0035265 0.811041
GO:0051496 0.811040
GO:0007162 0.810927
GO:0030218 0.809955
GO:0006139 0.809816
GO:0070374 0.808642
GO:0006298 0.808612
GO:0009056 0.808581
GO:0070507 0.808431
GO:0071363 0.808295
GO:0050680 0.808234
GO:0007169 0.807939
GO:0001894 0.807870
GO:0000902 0.806862
GO:0009617 0.806711
GO:1902904 0.806512
GO:0030097 0.806125
GO:0007399 0.805949
GO:0050853 0.805230
GO:0051726 0.804914
GO:0008360 0.804780
GO:0050863 0.804772
GO:0010629 0.804702
GO:0032880 0.804305
GO:0021795 0.804198
GO:0046488 0.804184
GO:0031032 0.804004
GO:0045595 0.803077
GO:0006936 0.802344
GO:0045793 0.802149
GO:0071222 0.801980
GO:0051897 0.801416
GO:0006606 0.800953
GO:0006886 0.800872
GO:0030307 0.800490
GO:0048738 0.800331
GO:0010821 0.800220
GO:0051247 0.800154
GO:0042752 0.800120
GO:0032835 0.800025
GO:0033138 0.799982
GO:1903578 0.799701
GO:0050673 0.798946
GO:0006997 0.798672
GO:0060341 0.798662
GO:0006281 0.798556
GO:0042391 0.798475
GO:0050808 0.797394
GO:0007267 0.797360
GO:0050865 0.797107
GO:0018105 0.797070
GO:0060560 0.796569
GO:0071478 0.796131
GO:0018107 0.796045
GO:0019216 0.795977
GO:0023061 0.795969
GO:0036473 0.795897
GO:0051147 0.795455
GO:0006996 0.794900
GO:0030217 0.794761
GO:0070527 0.794579
GO:0050804 0.793936
GO:0060021 0.793808
GO:0045321 0.793792
GO:0046034 0.792891
GO:1904646 0.792812
GO:0030182 0.792624
GO:0002764 0.790893
GO:0007596 0.790844
GO:0043542 0.790474
GO:0006355 0.790400
GO:0010638 0.790227
GO:0042110 0.789916
GO:2000811 0.789519
GO:0045785 0.789271
GO:0001952 0.789204
GO:0048709 0.787833
GO:0016192 0.787802
GO:0002320 0.787705
GO:0045944 0.787650
GO:0035051 0.787216
GO:0070663 0.786907
GO:0046486 0.786765
GO:0006914 0.786701
GO:0071407 0.786480
GO:0048468 0.786471
GO:0043065 0.786229
GO:1902532 0.786009
GO:0033044 0.785934
GO:0031333 0.785379
GO:0071417 0.785307
GO:0016241 0.785238
GO:0007268 0.785105
GO:0007010 0.785047
GO:0002443 0.783904
GO:2000270 0.783308
GO:0001764 0.782709
GO:0051174 0.781935
GO:0034329 0.781439
GO:0043549 0.781269
GO:0010595 0.781136
GO:2001020 0.780899
GO:0050776 0.780250
GO:0007159 0.780220
GO:0048041 0.780105
GO:0016236 0.779569
GO:0048638 0.778556
GO:0042551 0.778521
GO:0007517 0.778474
GO:0032869 0.777921
GO:0051649 0.777222
GO:0009725 0.777056
GO:0030855 0.776398
GO:0002040 0.776347
GO:0071310 0.775759
GO:0042063 0.775499
GO:0009266 0.775262
GO:0048469 0.774721
GO:0042307 0.774054
GO:0032879 0.772742
GO:0002376 0.772696
GO:0055082 0.772549
GO:0016070 0.772150
GO:0060840 0.771853
GO:0010632 0.771656
GO:0007219 0.771429
GO:0051341 0.770833
GO:0060416 0.770267
GO:0090050 0.770256
GO:0002274 0.770035
GO:0009968 0.768538
GO:0009416 0.768293
GO:0009653 0.767978
GO:0030183 0.767941
GO:0007507 0.766819
GO:0007283 0.766625
GO:0048589 0.766590
GO:0050790 0.766284
GO:0065003 0.765562
GO:0030032 0.765559
GO:0048103 0.765258
GO:0006954 0.764565
GO:0048878 0.764329
GO:0007420 0.764092
GO:0030168 0.762921
GO:0006629 0.761422
GO:0006644 0.760398
GO:0001525 0.760172
GO:0120035 0.759979
GO:0034103 0.759958
GO:0014070 0.759563
GO:0044255 0.758471
GO:0051098 0.758377
GO:0051641 0.757853
GO:0034599 0.756607
GO:0043473 0.756079
GO:0036092 0.755500
GO:0048863 0.755435
GO:2000352 0.754950
GO:0030162 0.754327
GO:0042325 0.754119
GO:0008202 0.754059
GO:0033628 0.753988
GO:0051146 0.753713
GO:0010243 0.753077
GO:0043524 0.752485
GO:0003014 0.752381
GO:0002684 0.752271
GO:0001763 0.751863
GO:0051145 0.751530
GO:0045596 0.750733
GO:0000226 0.750411
GO:0031175 0.749603
GO:0007155 0.749178
GO:0002064 0.748667
GO:0045597 0.748593
GO:0040008 0.748313
GO:0060485 0.746706
GO:0006508 0.746456
GO:0097191 0.746084
GO:0016477 0.745481
GO:0005975 0.745307
GO:0043066 0.745136
GO:0050890 0.744664
GO:0007265 0.744598
GO:0032092 0.743751
GO:0051017 0.743352
GO:0007005 0.742652
GO:0043434 0.742583
GO:0003158 0.742221
GO:0042113 0.742005
GO:0120162 0.741508
GO:0051881 0.740169
GO:0030522 0.740000
GO:0007160 0.739980
GO:0048511 0.737766
GO:0044281 0.736585
GO:0007568 0.736500
GO:0007610 0.735604
GO:0035556 0.734890
GO:0048017 0.734281
GO:0006325 0.734050
GO:0006915 0.733434
GO:0008284 0.731624
GO:0045165 0.731183
GO:0002682 0.730932
GO:0022414 0.730731
GO:0097190 0.730349
GO:0051494 0.729426
GO:0051128 0.728822
GO:0043254 0.728497
GO:0098609 0.727096
GO:0006338 0.726229
GO:0007423 0.725167
GO:0001649 0.721908
GO:0048661 0.721041
GO:0010941 0.719507
GO:1900407 0.716777
GO:0007166 0.714859
GO:0002683 0.708778
GO:1902903 0.708773
GO:0008285 0.705969
GO:0030900 0.704312
GO:0034504 0.704188
GO:0033365 0.703782
GO:0070997 0.703482
GO:0033043 0.697664
GO:0051249 0.687925
GO:0008104 0.675076
GO:0033002 0.627125
GO:0042593 0.364293
GO:0071887 0.359703
GO:0044262 0.349744
GO:0051640 0.348052
GO:0051000 0.299934
GO:0050778 0.290903
GO:0007156 0.278928
GO:0008361 0.265902
GO:0070301 0.262988
GO:0022407 0.258553
GO:0015031 0.257560
GO:0043525 0.248848
GO:0051353 0.242570
GO:0043086 0.239824
GO:0045471 0.229314
GO:0051497 0.208992
GO:0031529 0.208648
GO:0099504 0.206822
GO:0043547 0.194217
GO:1904659 0.191457
GO:0031334 0.184335
GO:0046677 0.183479
GO:0015980 0.180476
GO:0060291 0.175234
GO:0009259 0.166290
GO:0060173 0.162212
GO:0042632 0.145299
GO:0046890 0.144186
GO:0032760 0.142722
GO:0051302 0.135000
GO:0031295 0.134696
GO:0019318 0.123006
GO:0010951 0.120040
GO:0021987 0.119137
GO:0006163 0.118024
GO:0030041 0.107955
GO:0001892 0.106324
GO:0030512 0.105991
GO:0060079 0.105991
GO:0050770 0.098547
GO:0051928 0.097553
GO:0031397 0.094042
GO:0060041 0.082956
GO:0051047 0.076258
GO:0019722 0.041730
GO:0090042 0.035699
Code
sns.set(rc={'figure.figsize':(6,4)})
perc = str(round((100*len(GO_terms_auc_svm_df_final[GO_terms_auc_svm_df_final["auc"]>0.7])/len(GO_terms_auc_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.xlabel("AUC (logistic 1)", fontsize=16)  
plt.title(perc, fontsize=16)
# con el que mejor funciona es con la suma normal del attribution 
Text(0.5, 1.0, '94.68%')

Final model AUPR

Code
GO_terms_aupr_svm_df_final = pd.DataFrame(list(GO_terms_aupr_svm_final.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df_final = GO_terms_aupr_svm_df_final.dropna()
GO_terms_aupr_svm_df_final.sort_values(by=["aupr"], ascending=False).head()
aupr
goterm
GO:0036289 1.000000
GO:0050896 0.995438
GO:0043170 0.989680
GO:0006807 0.987396
GO:0060440 0.978213
Code
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_svm_df_final[GO_terms_aupr_svm_df_final["aupr"]>0.7])/len(GO_terms_aupr_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)
Text(0.5, 1.0, '20.02%')

Predict for a new drug

Make predictions

Code
unknown = list(set(attribution_data_all.columns)-set(attribution_data_annotated.columns))

Get the probabilities for all unknown drugs

Code
predictions = {}
distances = {}
probabilities_unknown = pd.DataFrame()
preds_unknown = pd.DataFrame()

for drug in unknown:
    probabilities = {}
    for goterm in models_svm.keys():

        list_nodes = list(models_svm[goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)

        score = attribution_data_all.loc[list_nodes][drug].to_frame().T 
        score_mod = score.divide(attribution_data_annotated.loc[list_nodes].T.std()).fillna(0) #divide by std of each neuron, only use drugs that trained the models

        predictions[goterm]=models_svm[goterm].predict(score_mod)
        probabilities[goterm] = models_svm[goterm].predict_proba(score_mod)[::,1]  # platt values
        # distances[goterm] = models_svm[goterm].decision_function(score_mod)
        
    drug_probs = pd.DataFrame.from_dict(probabilities).T
    drug_probs.columns = [drug]
    drug_preds = pd.DataFrame.from_dict(predictions).T
    drug_preds.columns = [drug]    
    probabilities_unknown = pd.concat([probabilities_unknown,drug_probs], axis=1)
    preds_unknown = pd.concat([preds_unknown,drug_preds], axis=1)
    print(drug)
brd-k19103580-001-01-2
nvp-bhg712
wh-4-023
pd173074
cbpnzqvsjqdfbe-rerlvdevsa-n
n-(2,5-dimethoxyphenyl)sulfonyl-n-(4-methoxyphenyl)benzamide
brd-k33514849-001-01-9
chembl3182697
dfsdbfjuwanyes-ubwkhrtasa-n
nvp-adw742
sb 225002
schembl10436373
stf-62247
zm-447439
gsk269962a
schembl12469828
schembl2139153
azd7545
mira-1
wee1 inhibitor
achp
chembl2203525
brd-k49290616-001-01-9
nvp-231
pha-665752
jq1
n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide
otkwubxkthwzke-fuopvmcbsa-n
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn.cl
cid5951923
ml-030
cct036477
pluripotin
nutlin-3a
skepinone-l
gsk-j4
(-)-rapamycin
hhdwuyjenprcsp-uttpphfysa-n
r406 (free base)
n-[[(4r,5r)-2-[(2r)-1-hydroxypropan-2-yl]-4-methyl-8-(4-methylpent-1-ynyl)-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyrazinecarboxamide
plx-4720
chebi:119735
pdk1 inhibitor
pf-573228
jq1 + mk-0752
cay10594
fti-277
gw843682x
sz4ta2
bix 02189
chembl2180739
chm-1
s-trityl-l-cysteine
qs11
hg6-64-1
lomeguatrib
rad51 inhibitor b02
bms-509744
bms614
brd-k09587429-001-01-3
gsk429286a
bx-912
fawugygebhaqbu-ppexnqrjsa-n
schembl2066172
schembl618594
ethyl 5,5,7,7-tetramethyl-2-(5-nitrothiophene-2-carboxamido)-4,5,6,7-tetrahydrothieno[2,3-c]pyridine-3-carboxylate
ar-42
fttyfnwrwdlflp-uhfffaoysa-n
n'-(2-pyrrolylidenemethyl)-2-(2,4,6-trichlorophenoxy)acetohydrazide
methylstat
nsc87877
opahmanwvumwaw-ghfzsmqjsa-n
wp-1130
dacarbazine
schembl6874948
mk-0752
brd-k35716340-001-01-2
brd-a63646118-001-02-6
brd-k62801835-001-01-0
shikonin
nsc 23766
selisistat
brd-k50799972-001-01-3
homoharringtonine
unc0321
sr8278
erastin
schembl12180851
schembl1710881
brd-k17060750-001-01-0
dbeq
jq1 + schembl2671349
mnulegdcpyonbu-pamdcedjsa-n
schembl13833463
gdc-0879
mdivi-1
chembl515416
brd-k53792571-003-01-6
n-[(2r,3r)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-4-pyridinecarboxamide
n-[[(2s,3s)-8-[2-(1-hydroxycyclopentyl)ethynyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-pyrido[2,3-b][1,5]oxazocin-2-yl]methyl]-n-methyl-4-oxanecarboxamide
gsk4112
mkwlqyduwjbeku-lwsjdiafsa-n
chembl3183639
khs101
ch 55
narciclasine
betulinic acid
sb-590885
brd-k20514654-001-01-8
dacinostat
unc0638
nsc207895
tw-37
11-cis retinoic acid
bibr 1532
dichloroplatinum diammoniate
5-azacytidine
gqrreykspjmlaw-ygnumjmvsa-n
chembl2058177
c6 ceramide
camptothecin
schembl12474870
agk2
ski ii
chembl24850
icg-001
n-[(2s,3s)-2-[(dimethylamino)methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-2,5-dimethyl-3-pyrazolecarboxamide
vx-11e
cct 018159
wfygwjxipugujf-uhfffaoysa-n
mls000571394
aacocf3
naphtho(2,1-b)furan, 1-methyl-2-nitro-
n-[(2s,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4-pyridinecarboxamide
brd-k25737009-001-01-2
jnk inhibitor viii
spautin-1
ic-87114
sr1001
16beta-bromoandrosterone
schembl2586580
nvp-tae684
cgp-60474
jw 480
fhtvasvneuemiv-lwsjdiafsa-n
ethyl 4-[4-[(5-nitrofuran-2-yl)methylidene]-3,5-dioxopyrazolidin-1-yl]benzoate
dmog
schembl15422095
chembl2132053
brd-k02251932-001-01-3
bms-536924;cc1=cc(=cc2=c1n/c(=c\3/c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)/n2)n5ccocc5
nsc60043
ng25
cid-2858522
schembl13737661
ikk-3 inhibitor
brd7880
smer-3
cx-5461
am580
elesclomol
chembl2381520
hc-067047
schembl17821363
niclosamide
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]methanesulfonamide
thapsigargin
brd1172
schembl1914213
i-bet-762
brd-k58306044-001-01-3
a-770041
chembl585951
glutaminase c-in-1
ql47
chembl2143553
cytochalasin b
n1-[2-(1h-indol-3-yl)ethyl]-n3-pyridin-4-ylbenzene-1,3-diamine
curcumin, curcuma longa l.
pifithrin
schembl16479156
chlorambucil
schembl15444220
zinc113660258
yk-4-279
pci-34051
fh535
schembl916391
n-(2-(4-(2-oxo-2,3-dihydro-1h-benzo[d]imidazol-1-yl)piperidin-1-yl)ethyl)-2-naphthamide
brd-k30019337-001-01-1
nvp-bsk805
chembl2356172
unc-0638 + schembl2671349
ki8751
az3146
isx-9
schembl13741284
ku-0063794
sb-431542
tpca-1
pdipalloxofubu-uhfffaoysa-n
(-)-epigallocatechin gallate
bleomycin
rsk inhibitor fmk
n9-isopropyl-olomoucine
mgcd-265
ml031
mg-132
n-[3-(1h-benzimidazol-2-yl)-5-(1-piperazinylmethyl)phenyl]-2-quinoxalinecarboxamide
le 135
phloretin
schembl2085358
abt-737
jw74
pf 750
chembl436817
cp-466722
myricetin
kcbbhekxehmwfw-yqzfvpmhsa-n
gw-405833
brd-6929
brd-k52037352-001-01-6
stemregenin 1
iu-1
apicidin
spox1_002925
brd6708
darinaparsin
wpttvjltnawyao-cdypjpissa-n
mi-2
ak174031
chebi:94975
nsc373989
snx-2112
ac-55649
gsk-650394
cyclopamine
o6-benzylguanine
pd153035
ipa-3
chir-99021
n-[[(4s,5r)-8-[2-(2-fluorophenyl)ethynyl]-2-[(2s)-1-hydroxypropan-2-yl]-4-methyl-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyridin-4-ylacetamide
su11274
chebi:94110
brd-k49456190-001-01-0
cct007093
schembl18216694
n-[(2r,3s)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]cyclohexanecarboxamide
schembl18188080
cdk9 inhibitor
(s)-selisistat
parthenolide;c/c/1=c\cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c
marinopyrrole a
brd-a34462049-001-01-0
etp-46464
lsm-6185
bx-795
nu-7441
n-[(2s,3s,6r)-2-(hydroxymethyl)-6-[2-(4-methyl-1-piperazinyl)-2-oxoethyl]-3-oxanyl]-1,3-benzodioxole-5-carboxamide
xhqlywyicdktpj-uhfffaoysa-n
cay10603
nelarabine
parbendazole
brd-k33199242-001-01-2
mln2480
lsm-13729
tubastatin a
kpt-185
bai1
ci 976
importazole
brd-k04800985-001-01-1
gw 441756
schembl16273428
mitomycin c
i-bet151
as-605240
pd318088
cil56
brd-k02492147-001-01-4
azd1152-hqpa
brd-k27986637-001-01-3
ethyl 2-cyano-3-(3,4-dichlorophenyl)acryloylcarbamate
tcmdc-125552
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn
bay 61-3606 + hydrochloric acid
bryostatin 1
bam7
palmostatin b
serdemetan
jq1 + unc0638
n-[2-methyl-5-[oxo-[3-(1-oxoprop-2-enylamino)-5-(trifluoromethyl)anilino]methyl]phenyl]-5-isoxazolecarboxamide
oprea1_718426
tipifarnib (s enantiomer)
salermide
sch-529074
pf-543
sb-525334
ro-3306
pf-4708671
gsk461364
pf 184
lrlwxbhfpgsuox-hhkxydnmsa-n
bendamustine
chembl416418
pik-93
chembl568305
chembl3188232
ku-55933
6-bio
ku-60019
schembl16296919
ak174031 + mk-1775
ml311
schembl18426910
vaf347
bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5
sl-0101
mps1-in-1 + hydrochloric acid
nsc 95397
lfm-a13
schembl16046542
telomerase inhibitor ix
mls000106215
procarbazine
gnf-2
fqi1
brd-a28105619-001-01-3
ly2183240
embelin
mtlmdzjugdutcp-ywefrbeisa-n
cot inhibitor-2
isoliquiritigenin
n-[3-[[2-[[4-(dimethylamino)cyclohexyl]amino]-9-propan-2-yl-6-purinyl]amino]phenyl]-2-propenamide
bms270394
nsc74859
austocystin d
schembl15428380
ku-0060648
dasa-58
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-3-(4-morpholinyl)propanamide
brd-a59431241-001-01-1
whi-p97
brd-a15100685-001-01-8
schembl4463213
osi-027;coc1=cc=cc2=c/c(=c/3\c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)/n=c21
aica ribonucleotide
n-[(2s,3s)-2-[[[(cyclohexylamino)-oxomethyl]-methylamino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-2-(1-methyl-3-indolyl)acetamide
chs-828
ifosfamide
sepantronium + bromide
bleomycin sulfate
gsk1904529a
ouabain
n-[(2r,3s)-2-[[(4-chlorophenyl)sulfonyl-methylamino]methyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4,4,4-trifluorobutanamide
1009820-21-6
azanide; dichloroplatinum(2+)
brd3308
leptomycin b
as601245
erk5-in-1
phenformin
rg108
at7867
chebi:93385
tcmdc-123515
stf-31
chembl2062550
n-[2-methyl-5-[2-oxo-9-(1h-pyrazol-4-yl)-1-benzo[h][1,6]naphthyridinyl]phenyl]-2-propenamide
brd-k16147474-001-01-1
ubrvgbldxdoetm-uhfffaoysa-n
qfjcirlumzquot-laoshscvsa-n
ml-210
necrosulfonamide
a-804598
schembl15422028
schembl14934014
cdk4/6 inhibitor iv
srlvtmsbrcmody-qxpfvdmisa-n
j3.559.058g
tgx-221
gw2580
schembl6465274
nsc136476
c646
mls001198989
hms1361j12
ciclopirox
schembl10183194
salubrinal
ko 143
z-llnle-cho
lsm-6189
temozolomide
chembl258148
n-(4-methoxyphenyl)sulfonyl-n-[2-[2-(1-oxido-4-pyridin-1-iumyl)ethenyl]phenyl]acetamide
necrostatin-1
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=cc(=cnnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)c1=o
nsc48300
lrrk2-in-1
t0901317
n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide
jw-55
pf-4800567
az-628
fr-180204
wz4002
schembl12182311
brd4770
brd-k41597374-001-01-7
eht-1864
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=c/c(=c/nnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)/c1=o
wz8040
ahpn
chembl2152368
epz004777 + schembl2671349
nan + nan
schembl4320913
lsm-36779
brd-k29086754-001-01-7
a-443654
sepantronium
isoevodiamine
retinol + schembl2671349
sch-529074 + jnj-26854165
agwauacrbaqpjj-uhfffaoysa-n
chembl520231
schembl11942935
chembl3185999
n-methyl-n-[4-[[6-[[1-(1-oxoprop-2-enyl)-3-piperidinyl]amino]-7h-purin-2-yl]amino]phenyl]propanamide
parthenolide;c/c/1=c/cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c
ym-201636
schembl2671349
smr000198998
rigosertib
dqnfqthsdkxsee-qfzqxzrasa-n
cbb1007
oqhlpaawwgdxaw-uhfffaoysa-n
daporinad
cyclophosphamide
mi-1
schembl12041987
chembl2206358
brd-1240
isonicotinohydroxamic acid
rsl3
schembl13833318
1,2-cyclohexanediamine anion + oxalic acid
sb-216763
brd-k05870596-001-01-4
dnmdp-2
n-[6-(2-amino-4-fluoroanilino)-6-oxohexyl]-4-methylbenzamide
chembl2398212
akt inhibitor viii

Study drug with unknown MOA

Choose drug with unknown MOA…

Code
combobox_u = interactive(f, drug=widgets.Combobox(options=unknown))
Code
predictions_nodes = []
for goterm in list(platt_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
Code
# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")
Code
display(combobox_u)
Code
selected_drug_u_name = combobox_u.result
Code
predictions_df = pd.DataFrame.from_dict(preds_unknown.loc[:,selected_drug_u_name]).reset_index()
predictions_df.columns  = ["GO_term","predictions"]
Code
probabilities_df = pd.DataFrame.from_dict(probabilities_unknown.loc[:,selected_drug_u_name]).reset_index()
probabilities_df.columns  = ["GO_term","probability"]
probabilities_df = probabilities_df.merge(real_go_info_svm, on="GO_term")
probabilities_df = probabilities_df.merge(predictions_df, on="GO_term")
probabilities_df.loc[probabilities_df["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(200)
GO_term probability Name layer_number predictions
820 GO:0033993 0.809556 Response to lipid (1) 3.0 1.0
223 GO:0018108 0.783375 Peptidyl-tyrosine phosphorylation (1) 3.0 1.0
573 GO:0010629 0.742615 Negative regulation of gene expression (1) 3.0 1.0
106 GO:0071900 0.725062 Regulation of protein serine/threonine kinase activity (1) 2.0 1.0
624 GO:0010628 0.702924 Positive regulation of gene expression (1) 3.0 1.0
74 GO:0001817 0.687600 Regulation of cytokine production (1) 3.0 1.0
44 GO:0048812 0.672129 Neuron projection morphogenesis (1) 3.0 1.0
224 GO:0046777 0.661031 Protein autophosphorylation (1) 1.0 1.0
99 GO:0001934 0.658374 Positive regulation of protein phosphorylation (1) 3.0 1.0
570 GO:0045597 0.628072 Positive regulation of cell differentiation (1) 3.0 1.0
839 GO:0031047 0.553888 Gene silencing by rna (1) 2.0 1.0
100 GO:0033138 0.542770 Positive regulation of peptidyl-serine phosphorylation (1) 1.0 1.0
888 GO:0034976 0.540483 Response to endoplasmic reticulum stress (1) 3.0 1.0
633 GO:0051301 0.535293 Cell division (1) 2.0 1.0
821 GO:0034097 0.533600 Response to cytokine (1) 3.0 1.0
423 GO:1902533 0.530199 Positive regulation of intracellular signal transduction (1) 2.0 1.0
596 GO:0060341 0.523390 Regulation of cellular localization (1) 3.0 0.0
729 GO:0120035 0.514954 Regulation of plasma membrane bounded cell projection organization (1) 3.0 1.0
568 GO:0008284 0.513916 Positive regulation of cell population proliferation (1) 2.0 1.0
558 GO:0016032 0.500000 Viral process (1) 3.0 1.0
641 GO:0071417 0.494543 Cellular response to organonitrogen compound (1) 3.0 1.0
353 GO:0006954 0.482633 Inflammatory response (1) 3.0 1.0
9 GO:0043408 0.476159 Regulation of mapk cascade (1) 2.0 1.0
808 GO:1902532 0.449154 Negative regulation of intracellular signal transduction (1) 3.0 1.0
11 GO:0043406 0.445845 Positive regulation of map kinase activity (1) 1.0 1.0
8 GO:0000165 0.443783 Mapk cascade (1) 3.0 0.0
358 GO:0007005 0.427023 Mitochondrion organization (1) 3.0 1.0
134 GO:0002366 0.423235 Leukocyte activation involved in immune response (1) 3.0 1.0
221 GO:0018105 0.422261 Peptidyl-serine phosphorylation (1) 2.0 0.0
285 GO:0051051 0.421500 Negative regulation of transport (1) 3.0 1.0
191 GO:0045944 0.420524 Positive regulation of transcription by rna polymerase ii (1) 2.0 1.0
654 GO:0090398 0.416328 Cellular senescence (1) 1.0 1.0
847 GO:0045055 0.414665 Regulated exocytosis (1) 2.0 1.0
342 GO:2001243 0.411574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0 1.0
510 GO:0042063 0.408279 Gliogenesis (1) 3.0 1.0
788 GO:0009410 0.407937 Response to xenobiotic stimulus (1) 2.0 1.0
824 GO:0071363 0.407544 Cellular response to growth factor stimulus (1) 3.0 1.0
496 GO:0048608 0.406699 Reproductive structure development (1) 2.0 1.0
528 GO:0007565 0.406022 Female pregnancy (1) 2.0 1.0
896 GO:0097193 0.400210 Intrinsic apoptotic signaling pathway (1) 3.0 1.0
76 GO:0001819 0.388417 Positive regulation of cytokine production (1) 2.0 1.0
48 GO:0001525 0.386728 Angiogenesis (1) 2.0 1.0
300 GO:0032386 0.384834 Regulation of intracellular transport (1) 2.0 1.0
906 GO:0043549 0.378711 Regulation of kinase activity (1) 3.0 0.0
662 GO:0031648 0.377411 Protein destabilization (1) 0.0 1.0
516 GO:0007423 0.372363 Sensory organ development (1) 3.0 1.0
461 GO:0050804 0.367884 Modulation of chemical synaptic transmission (1) 3.0 1.0
104 GO:0006469 0.364467 Negative regulation of protein kinase activity (1) 2.0 1.0
620 GO:0051098 0.363823 Regulation of binding (1) 3.0 1.0
86 GO:0072006 0.358531 Nephron development (1) 2.0 1.0
473 GO:0008584 0.350294 Male gonad development (1) 1.0 1.0
536 GO:0007610 0.346612 Behavior (1) 3.0 0.0
693 GO:1904646 0.344997 Cellular response to amyloid-beta (1) 0.0 1.0
376 GO:0007015 0.344878 Actin filament organization (1) 3.0 1.0
552 GO:0033365 0.343027 Protein localization to organelle (1) 3.0 1.0
774 GO:0030216 0.342203 Keratinocyte differentiation (1) 2.0 1.0
639 GO:0060326 0.341919 Cell chemotaxis (1) 2.0 1.0
154 GO:0050778 0.335649 Positive regulation of immune response (1) 3.0 0.0
480 GO:0048565 0.333289 Digestive tract development (1) 1.0 1.0
794 GO:0043434 0.332497 Response to peptide hormone (1) 3.0 1.0
651 GO:0050808 0.322913 Synapse organization (1) 3.0 1.0
772 GO:0060485 0.320964 Mesenchyme development (1) 3.0 1.0
406 GO:0048041 0.319689 Focal adhesion assembly (1) 1.0 1.0
698 GO:0070663 0.318299 Regulation of leukocyte proliferation (1) 2.0 1.0
804 GO:0030855 0.317904 Epithelial cell differentiation (1) 3.0 0.0
538 GO:0048266 0.308838 Behavioral response to pain (1) 0.0 1.0
225 GO:0006470 0.305848 Protein dephosphorylation (1) 3.0 1.0
719 GO:0043244 0.305739 Regulation of protein-containing complex disassembly (1) 2.0 1.0
725 GO:0070997 0.305130 Neuron death (1) 2.0 0.0
532 GO:0007596 0.304871 Blood coagulation (1) 3.0 1.0
806 GO:0051056 0.302828 Regulation of small gtpase mediated signal transduction (1) 3.0 1.0
433 GO:0097191 0.302625 Extrinsic apoptotic signaling pathway (1) 3.0 1.0
574 GO:0008285 0.300631 Negative regulation of cell population proliferation (1) 3.0 0.0
49 GO:0001569 0.300115 Branching involved in blood vessel morphogenesis (1) 0.0 1.0
926 GO:0060020 0.297104 Bergmann glial cell differentiation (1) 0.0 1.0
789 GO:0009416 0.295539 Response to light stimulus (1) 2.0 0.0
887 GO:0034504 0.294393 Protein localization to nucleus (1) 2.0 1.0
169 GO:0002764 0.292841 Immune response-regulating signaling pathway (1) 3.0 0.0
890 GO:0071353 0.290912 Cellular response to interleukin-4 (1) 1.0 1.0
505 GO:0007283 0.290337 Spermatogenesis (1) 2.0 1.0
513 GO:0030900 0.289030 Forebrain development (1) 3.0 1.0
241 GO:0006612 0.288796 Protein targeting to membrane (1) 1.0 1.0
644 GO:0071230 0.284292 Cellular response to amino acid stimulus (1) 1.0 1.0
712 GO:0033002 0.280632 Muscle cell proliferation (1) 2.0 1.0
317 GO:0006897 0.277768 Endocytosis (1) 3.0 1.0
230 GO:0030162 0.277751 Regulation of proteolysis (1) 3.0 0.0
607 GO:0042391 0.276380 Regulation of membrane potential (1) 3.0 0.0
689 GO:1905897 0.275961 Regulation of response to endoplasmic reticulum stress (1) 2.0 1.0
105 GO:0045860 0.275729 Positive regulation of protein kinase activity (1) 2.0 0.0
198 GO:0006260 0.275261 Dna replication (1) 3.0 1.0
548 GO:1903829 0.274893 Positive regulation of protein localization (1) 3.0 0.0
934 GO:0051258 0.274092 Protein polymerization (1) 3.0 1.0
378 GO:0031532 0.271109 Actin cytoskeleton reorganization (1) 1.0 1.0
216 GO:0045727 0.270882 Positive regulation of translation (1) 1.0 1.0
642 GO:0034599 0.270088 Cellular response to oxidative stress (1) 3.0 0.0
767 GO:0051146 0.266722 Striated muscle cell differentiation (1) 2.0 0.0
77 GO:0002718 0.262195 Regulation of cytokine production involved in immune response (1) 2.0 1.0
19 GO:0031109 0.261030 Microtubule polymerization or depolymerization (1) 2.0 1.0
584 GO:0040008 0.259597 Regulation of growth (1) 3.0 0.0
937 GO:0051640 0.257672 Organelle localization (1) 3.0 0.0
377 GO:0031032 0.257340 Actomyosin structure organization (1) 2.0 1.0
904 GO:0042113 0.254618 B cell activation (1) 3.0 0.0
133 GO:0043303 0.253768 Mast cell degranulation (1) 1.0 1.0
561 GO:0048511 0.251652 Rhythmic process (1) 3.0 1.0
243 GO:0006606 0.250144 Protein import into nucleus (1) 1.0 1.0
836 GO:1901987 0.249107 Regulation of cell cycle phase transition (1) 3.0 0.0
758 GO:0031099 0.247922 Regeneration (1) 2.0 1.0
739 GO:1902903 0.245529 Regulation of supramolecular fiber organization (1) 3.0 1.0
323 GO:0016236 0.245469 Macroautophagy (1) 3.0 1.0
478 GO:0048568 0.244894 Embryonic organ development (1) 3.0 0.0
103 GO:0042531 0.243583 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0 1.0
598 GO:0043254 0.240098 Regulation of protein-containing complex assembly (1) 3.0 1.0
864 GO:0030183 0.236405 B cell differentiation (1) 1.0 0.0
301 GO:0032388 0.235842 Positive regulation of intracellular transport (1) 1.0 0.0
692 GO:0010595 0.235403 Positive regulation of endothelial cell migration (1) 2.0 1.0
865 GO:0030217 0.234830 T cell differentiation (1) 3.0 0.0
20 GO:0070507 0.234661 Regulation of microtubule cytoskeleton organization (1) 2.0 1.0
111 GO:0031069 0.234249 Hair follicle morphogenesis (1) 0.0 1.0
363 GO:0051494 0.232679 Negative regulation of cytoskeleton organization (1) 2.0 1.0
226 GO:0035304 0.232643 Regulation of protein dephosphorylation (1) 2.0 1.0
913 GO:0090630 0.231968 Activation of gtpase activity (1) 0.0 1.0
733 GO:0030335 0.231192 Positive regulation of cell migration (1) 3.0 0.0
196 GO:0006357 0.230696 Regulation of transcription by rna polymerase ii (1) 3.0 1.0
524 GO:0007519 0.230417 Skeletal muscle tissue development (1) 2.0 1.0
53 GO:0001570 0.230416 Vasculogenesis (1) 1.0 1.0
26 GO:1901990 0.229708 Regulation of mitotic cell cycle phase transition (1) 2.0 0.0
33 GO:0000423 0.229518 Mitophagy (1) 1.0 1.0
615 GO:0035265 0.227576 Organ growth (1) 2.0 0.0
84 GO:0001822 0.227091 Kidney development (1) 3.0 0.0
151 GO:0006959 0.225795 Humoral immune response (1) 2.0 1.0
244 GO:0042307 0.225164 Positive regulation of protein import into nucleus (1) 0.0 1.0
24 GO:0007346 0.224934 Regulation of mitotic cell cycle (1) 3.0 0.0
162 GO:0060374 0.223666 Mast cell differentiation (1) 0.0 1.0
533 GO:0030168 0.219923 Platelet activation (1) 2.0 1.0
152 GO:0045087 0.218376 Innate immune response (1) 3.0 0.0
523 GO:0007517 0.217930 Muscle organ development (1) 3.0 0.0
138 GO:0002683 0.217495 Negative regulation of immune system process (1) 3.0 0.0
319 GO:0006909 0.217477 Phagocytosis (1) 2.0 1.0
18 GO:0000226 0.216530 Microtubule cytoskeleton organization (1) 3.0 0.0
622 GO:0043086 0.212387 Negative regulation of catalytic activity (1) 3.0 1.0
898 GO:0035924 0.211788 Cellular response to vascular endothelial growth factor stimulus (1) 2.0 1.0
187 GO:0071897 0.211315 Dna biosynthetic process (1) 2.0 0.0
517 GO:0043586 0.209986 Tongue development (1) 1.0 1.0
606 GO:0048638 0.209981 Regulation of developmental growth (1) 2.0 0.0
736 GO:0009617 0.209127 Response to bacterium (1) 3.0 0.0
691 GO:0043542 0.207450 Endothelial cell migration (1) 3.0 0.0
321 GO:0010507 0.205494 Negative regulation of autophagy (1) 1.0 1.0
149 GO:0050853 0.205417 B cell receptor signaling pathway (1) 1.0 0.0
885 GO:1900180 0.205300 Regulation of protein localization to nucleus (1) 1.0 1.0
907 GO:0051881 0.202707 Regulation of mitochondrial membrane potential (1) 1.0 0.0
171 GO:0003014 0.201637 Renal system process (1) 2.0 1.0
211 GO:0031507 0.200984 Heterochromatin assembly (1) 1.0 1.0
64 GO:0071456 0.200564 Cellular response to hypoxia (1) 1.0 0.0
694 GO:0032869 0.197755 Cellular response to insulin stimulus (1) 2.0 0.0
779 GO:0008544 0.196206 Epidermis development (1) 3.0 0.0
634 GO:0061024 0.193065 Membrane organization (1) 2.0 0.0
50 GO:0002040 0.192737 Sprouting angiogenesis (1) 1.0 0.0
610 GO:0031333 0.191284 Negative regulation of protein-containing complex assembly (1) 2.0 1.0
657 GO:0045165 0.191259 Cell fate commitment (1) 3.0 0.0
435 GO:0016055 0.190638 Wnt signaling pathway (1) 2.0 0.0
569 GO:0030307 0.190101 Positive regulation of cell growth (1) 2.0 1.0
200 GO:0006281 0.189187 Dna repair (1) 2.0 0.0
63 GO:0001666 0.188575 Response to hypoxia (1) 2.0 1.0
560 GO:0043473 0.188137 Pigmentation (1) 2.0 1.0
521 GO:0035051 0.187551 Cardiocyte differentiation (1) 2.0 1.0
690 GO:2001020 0.187450 Regulation of response to dna damage stimulus (1) 2.0 0.0
346 GO:0006936 0.187431 Muscle contraction (1) 3.0 1.0
328 GO:0043065 0.184805 Positive regulation of apoptotic process (1) 2.0 0.0
650 GO:0034329 0.183554 Cell junction assembly (1) 2.0 0.0
262 GO:0045429 0.183059 Positive regulation of nitric oxide biosynthetic process (1) 0.0 1.0
117 GO:0060562 0.183024 Epithelial tube morphogenesis (1) 2.0 0.0
583 GO:0032967 0.182852 Positive regulation of collagen biosynthetic process (1) 0.0 1.0
208 GO:0006325 0.182003 Chromatin organization (1) 3.0 0.0
209 GO:0006338 0.181855 Chromatin remodeling (1) 2.0 1.0
882 GO:0032147 0.181843 Activation of protein kinase activity (1) 1.0 0.0
7 GO:0000122 0.181449 Negative regulation of transcription by rna polymerase ii (1) 1.0 0.0
911 GO:0060416 0.180831 Response to growth hormone (1) 1.0 1.0
121 GO:0090050 0.180206 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0 1.0
467 GO:0009791 0.179613 Post-embryonic development (1) 1.0 0.0
870 GO:0070527 0.179383 Platelet aggregation (1) 1.0 1.0
781 GO:0008625 0.179216 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0 1.0
702 GO:0048146 0.179210 Positive regulation of fibroblast proliferation (1) 0.0 1.0
785 GO:0009266 0.178944 Response to temperature stimulus (1) 2.0 1.0
316 GO:0033157 0.178583 Regulation of intracellular protein transport (1) 1.0 0.0
472 GO:0001553 0.178541 Luteinization (1) 0.0 1.0
174 GO:0010613 0.176998 Positive regulation of cardiac muscle hypertrophy (1) 1.0 1.0
842 GO:0071407 0.176595 Cellular response to organic cyclic compound (1) 3.0 0.0
834 GO:0035195 0.176389 Gene silencing by mirna (1) 1.0 0.0
504 GO:0048709 0.176248 Oligodendrocyte differentiation (1) 2.0 1.0
54 GO:2001214 0.175971 Positive regulation of vasculogenesis (1) 0.0 1.0
600 GO:0010632 0.174413 Regulation of epithelial cell migration (1) 3.0 0.0
682 GO:0007026 0.174040 Negative regulation of microtubule depolymerization (1) 0.0 1.0
276 GO:0016567 0.173194 Protein ubiquitination (1) 3.0 0.0
881 GO:0031929 0.171840 Tor signaling (1) 2.0 1.0
52 GO:0001541 0.171713 Ovarian follicle development (1) 1.0 0.0
310 GO:0051924 0.171261 Regulation of calcium ion transport (1) 3.0 0.0
899 GO:0035994 0.170901 Response to muscle stretch (1) 1.0 1.0
32 GO:0000422 0.170612 Autophagy of mitochondrion (1) 2.0 1.0
703 GO:0048661 0.169791 Positive regulation of smooth muscle cell proliferation (1) 1.0 0.0
447 GO:0007173 0.169218 Epidermal growth factor receptor signaling pathway (1) 2.0 0.0
Code
sum(probabilities_df["predictions"] ==1)
288
Code
sum(probabilities_df["predictions"] ==0)
651

Probability < 0.5 doesn’t mean it does not belong to the class, a probability of for example 0.2 can represent a 1 (annotated to MoA)

Modify probabilities

Take into account the annotations each GO term has (general GO terms are easier to predict as they have more annotations)

For drug with unknown MOA…

Code
sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori = np.log(sum_annotations/(1-sum_annotations))

logits_apost= np.log(probabilities_df["probability"]/(1-probabilities_df["probability"]))
delta_logits =logits_apost.to_numpy()- logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
probabilities_mod = probabilities_df.merge(delta_logits_df, left_index=True,right_index=True)
Code
probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <= 7].sort_values(by=["delta_logits"], ascending=False)
GO_term probability Name layer_number predictions delta_logits
839 GO:0031047 0.553888 Gene silencing by rna (1) 2.0 1.0 2.192458
662 GO:0031648 0.377411 Protein destabilization (1) 0.0 1.0 2.092834
106 GO:0071900 0.725062 Regulation of protein serine/threonine kinase activity (1) 2.0 1.0 1.944569
33 GO:0000423 0.229518 Mitophagy (1) 1.0 1.0 1.880007
538 GO:0048266 0.308838 Behavioral response to pain (1) 0.0 1.0 1.857031
913 GO:0090630 0.231968 Activation of gtpase activity (1) 0.0 1.0 1.793943
216 GO:0045727 0.270882 Positive regulation of translation (1) 1.0 1.0 1.746070
223 GO:0018108 0.783375 Peptidyl-tyrosine phosphorylation (1) 3.0 1.0 1.672826
719 GO:0043244 0.305739 Regulation of protein-containing complex disassembly (1) 2.0 1.0 1.646098
241 GO:0006612 0.288796 Protein targeting to membrane (1) 1.0 1.0 1.626841
104 GO:0006469 0.364467 Negative regulation of protein kinase activity (1) 2.0 1.0 1.593794
888 GO:0034976 0.540483 Response to endoplasmic reticulum stress (1) 3.0 1.0 1.575980
890 GO:0071353 0.290912 Cellular response to interleukin-4 (1) 1.0 1.0 1.575256
638 GO:0033554 0.884331 Cellular response to stress (1) 4.0 1.0 1.573975
820 GO:0033993 0.809556 Response to lipid (1) 3.0 1.0 1.551573
74 GO:0001817 0.687600 Regulation of cytokine production (1) 3.0 1.0 1.534715
682 GO:0007026 0.174040 Negative regulation of microtubule depolymerization (1) 0.0 1.0 1.533781
528 GO:0007565 0.406022 Female pregnancy (1) 2.0 1.0 1.478881
224 GO:0046777 0.661031 Protein autophosphorylation (1) 1.0 1.0 1.453821
544 GO:0060179 0.161090 Male mating behavior (1) 0.0 1.0 1.440901
32 GO:0000422 0.170612 Autophagy of mitochondrion (1) 2.0 1.0 1.409881
774 GO:0030216 0.342203 Keratinocyte differentiation (1) 2.0 1.0 1.406530
44 GO:0048812 0.672129 Neuron projection morphogenesis (1) 3.0 1.0 1.385004
472 GO:0001553 0.178541 Luteinization (1) 0.0 1.0 1.373326
77 GO:0002718 0.262195 Regulation of cytokine production involved in immune response (1) 2.0 1.0 1.372827
900 GO:0042060 0.612497 Wound healing (1) 4.0 1.0 1.325911
385 GO:0060632 0.143694 Regulation of microtubule-based movement (1) 1.0 1.0 1.306104
726 GO:0065003 0.654123 Protein-containing complex assembly (1) 4.0 1.0 1.304384
926 GO:0060020 0.297104 Bergmann glial cell differentiation (1) 0.0 1.0 1.288696
582 GO:1902459 0.140358 Positive regulation of stem cell population maintenance (1) 0.0 1.0 1.278722
63 GO:0001666 0.188575 Response to hypoxia (1) 2.0 1.0 1.276928
342 GO:2001243 0.411574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0 1.0 1.262448
573 GO:0010629 0.742615 Negative regulation of gene expression (1) 3.0 1.0 1.216446
480 GO:0048565 0.333289 Digestive tract development (1) 1.0 1.0 1.203775
174 GO:0010613 0.176998 Positive regulation of cardiac muscle hypertrophy (1) 1.0 1.0 1.199403
676 GO:0030282 0.116755 Bone mineralization (1) 1.0 1.0 1.177409
49 GO:0001569 0.300115 Branching involved in blood vessel morphogenesis (1) 0.0 1.0 1.170620
899 GO:0035994 0.170901 Response to muscle stretch (1) 1.0 1.0 1.156962
443 GO:0035860 0.114067 Glial cell-derived neurotrophic factor receptor signaling pathway (1) 0.0 1.0 1.151083
847 GO:0045055 0.414665 Regulated exocytosis (1) 2.0 1.0 1.096844
134 GO:0002366 0.423235 Leukocyte activation involved in immune response (1) 3.0 1.0 1.076788
181 GO:0006139 0.875555 Nucleobase-containing compound metabolic process (1) 6.0 1.0 1.061941
689 GO:1905897 0.275961 Regulation of response to endoplasmic reticulum stress (1) 2.0 1.0 1.052784
38 GO:0000902 0.813929 Cell morphogenesis (1) 4.0 1.0 1.052132
654 GO:0090398 0.416328 Cellular senescence (1) 1.0 1.0 1.048427
693 GO:1904646 0.344997 Cellular response to amyloid-beta (1) 0.0 1.0 1.043235
100 GO:0033138 0.542770 Positive regulation of peptidyl-serine phosphorylation (1) 1.0 1.0 1.018799
745 GO:0009653 0.877370 Anatomical structure morphogenesis (1) 5.0 1.0 1.014649
198 GO:0006260 0.275261 Dna replication (1) 3.0 1.0 1.007970
133 GO:0043303 0.253768 Mast cell degranulation (1) 1.0 1.0 0.981406
368 GO:0060271 0.126831 Cilium assembly (1) 3.0 1.0 0.970313
285 GO:0051051 0.421500 Negative regulation of transport (1) 3.0 1.0 0.964316
626 GO:0051649 0.802013 Establishment of localization in cell (1) 4.0 1.0 0.957089
378 GO:0031532 0.271109 Actin cytoskeleton reorganization (1) 1.0 1.0 0.947004
651 GO:0050808 0.322913 Synapse organization (1) 3.0 1.0 0.943923
11 GO:0043406 0.445845 Positive regulation of map kinase activity (1) 1.0 1.0 0.939978
34 GO:1903146 0.103490 Regulation of autophagy of mitochondrion (1) 1.0 1.0 0.932003
262 GO:0045429 0.183059 Positive regulation of nitric oxide biosynthetic process (1) 0.0 1.0 0.911660
661 GO:0046326 0.165116 Positive regulation of glucose import (1) 0.0 1.0 0.907432
911 GO:0060416 0.180831 Response to growth hormone (1) 1.0 1.0 0.896690
571 GO:2000010 0.127151 Positive regulation of protein localization to cell surface (1) 0.0 1.0 0.888565
633 GO:0051301 0.535293 Cell division (1) 2.0 1.0 0.887200
483 GO:0035909 0.162013 Aorta morphogenesis (1) 1.0 1.0 0.884752
54 GO:2001214 0.175971 Positive regulation of vasculogenesis (1) 0.0 1.0 0.863534
416 GO:0035556 0.887824 Intracellular signal transduction (1) 4.0 1.0 0.862843
470 GO:0042733 0.097137 Embryonic digit morphogenesis (1) 0.0 1.0 0.861598
886 GO:0034502 0.132453 Protein localization to chromosome (1) 2.0 1.0 0.856780
111 GO:0031069 0.234249 Hair follicle morphogenesis (1) 0.0 1.0 0.832897
880 GO:0031667 0.374380 Response to nutrient levels (1) 4.0 1.0 0.819334
833 GO:0010467 0.880677 Gene expression (1) 5.0 1.0 0.817357
184 GO:0006275 0.102018 Regulation of dna replication (1) 2.0 1.0 0.816174
162 GO:0060374 0.223666 Mast cell differentiation (1) 0.0 1.0 0.815593
265 GO:0051247 0.818839 Positive regulation of protein metabolic process (1) 4.0 1.0 0.802282
821 GO:0034097 0.533600 Response to cytokine (1) 3.0 1.0 0.801775
408 GO:0007165 0.930074 Signal transduction (1) 6.0 1.0 0.801131
849 GO:0043966 0.142338 Histone h3 acetylation (1) 2.0 1.0 0.797379
112 GO:0060789 0.091132 Hair follicle placode formation (1) 0.0 1.0 0.791156
558 GO:0016032 0.500000 Viral process (1) 3.0 1.0 0.785929
151 GO:0006959 0.225795 Humoral immune response (1) 2.0 1.0 0.785159
86 GO:0072006 0.358531 Nephron development (1) 2.0 1.0 0.777591
83 GO:0002720 0.163684 Positive regulation of cytokine production involved in immune response (1) 1.0 1.0 0.776348
53 GO:0001570 0.230416 Vasculogenesis (1) 1.0 1.0 0.770097
532 GO:0007596 0.304871 Blood coagulation (1) 3.0 1.0 0.764501
567 GO:0051641 0.841801 Cellular localization (1) 5.0 1.0 0.761481
639 GO:0060326 0.341919 Cell chemotaxis (1) 2.0 1.0 0.758938
19 GO:0031109 0.261030 Microtubule polymerization or depolymerization (1) 2.0 1.0 0.746075
800 GO:0030521 0.102431 Androgen receptor signaling pathway (1) 1.0 1.0 0.729092
511 GO:0030182 0.721830 Neuron differentiation (1) 5.0 1.0 0.726501
806 GO:0051056 0.302828 Regulation of small gtpase mediated signal transduction (1) 3.0 1.0 0.724275
527 GO:0007528 0.109807 Neuromuscular junction development (1) 1.0 1.0 0.722229
794 GO:0043434 0.332497 Response to peptide hormone (1) 3.0 1.0 0.716778
99 GO:0001934 0.658374 Positive regulation of protein phosphorylation (1) 3.0 1.0 0.708241
896 GO:0097193 0.400210 Intrinsic apoptotic signaling pathway (1) 3.0 1.0 0.705651
624 GO:0010628 0.702924 Positive regulation of gene expression (1) 3.0 1.0 0.704419
722 GO:0042325 0.780828 Regulation of phosphorylation (1) 5.0 1.0 0.698909
752 GO:0043170 0.942545 Macromolecule metabolic process (1) 7.0 1.0 0.693446
517 GO:0043586 0.209986 Tongue development (1) 1.0 1.0 0.692360
182 GO:0016070 0.778891 Rna metabolic process (1) 5.0 1.0 0.687621
281 GO:0006811 0.500000 Ion transport (1) 6.0 1.0 0.686632
586 GO:2000773 0.121640 Negative regulation of cellular senescence (1) 0.0 1.0 0.685597
808 GO:1902532 0.449154 Negative regulation of intracellular signal transduction (1) 3.0 1.0 0.684968
570 GO:0045597 0.628072 Positive regulation of cell differentiation (1) 3.0 1.0 0.680798
76 GO:0001819 0.388417 Positive regulation of cytokine production (1) 2.0 1.0 0.679733
934 GO:0051258 0.274092 Protein polymerization (1) 3.0 1.0 0.677814
713 GO:0035726 0.096936 Common myeloid progenitor cell proliferation (1) 0.0 1.0 0.667842
386 GO:0007049 0.727375 Cell cycle (1) 6.0 1.0 0.665704
473 GO:0008584 0.350294 Male gonad development (1) 1.0 1.0 0.663186
813 GO:0051898 0.103239 Negative regulation of protein kinase b signaling (1) 0.0 1.0 0.653209
698 GO:0070663 0.318299 Regulation of leukocyte proliferation (1) 2.0 1.0 0.652091
729 GO:0120035 0.514954 Regulation of plasma membrane bounded cell projection organization (1) 3.0 1.0 0.631426
881 GO:0031929 0.171840 Tor signaling (1) 2.0 1.0 0.624585
496 GO:0048608 0.406699 Reproductive structure development (1) 2.0 1.0 0.619202
559 GO:0022414 0.641754 Reproductive process (1) 4.0 1.0 0.617772
562 GO:0050896 0.963328 Response to stimulus (1) 7.0 1.0 0.605804
381 GO:0008064 0.113103 Regulation of actin polymerization or depolymerization (1) 2.0 1.0 0.603158
855 GO:1903578 0.141298 Regulation of atp metabolic process (1) 1.0 1.0 0.602865
619 GO:0050790 0.825081 Regulation of catalytic activity (1) 4.0 1.0 0.598054
476 GO:0048714 0.112165 Positive regulation of oligodendrocyte differentiation (1) 0.0 1.0 0.593772
363 GO:0051494 0.232679 Negative regulation of cytoskeleton organization (1) 2.0 1.0 0.593448
652 GO:0042180 0.090402 Cellular ketone metabolic process (1) 3.0 1.0 0.590857
423 GO:1902533 0.530199 Positive regulation of intracellular signal transduction (1) 2.0 1.0 0.581066
319 GO:0006909 0.217477 Phagocytosis (1) 2.0 1.0 0.578885
353 GO:0006954 0.482633 Inflammatory response (1) 3.0 1.0 0.578336
406 GO:0048041 0.319689 Focal adhesion assembly (1) 1.0 1.0 0.577606
585 GO:0048589 0.599054 Developmental growth (1) 4.0 1.0 0.575879
461 GO:0050804 0.367884 Modulation of chemical synaptic transmission (1) 3.0 1.0 0.568933
711 GO:0019752 0.316625 Carboxylic acid metabolic process (1) 4.0 1.0 0.563481
629 GO:0051174 0.783163 Regulation of phosphorus metabolic process (1) 6.0 1.0 0.558257
82 GO:0032743 0.079738 Positive regulation of interleukin-2 production (1) 0.0 1.0 0.545266
510 GO:0042063 0.408279 Gliogenesis (1) 3.0 1.0 0.539129
730 GO:0031175 0.631148 Neuron projection development (1) 4.0 1.0 0.537147
741 GO:0016477 0.682158 Cell migration (1) 4.0 1.0 0.536651
663 GO:0050821 0.165950 Protein stabilization (1) 0.0 1.0 0.535214
343 GO:1902166 0.145254 Negative regulation of intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 0.0 1.0 0.525494
213 GO:0006396 0.268052 Rna processing (1) 4.0 1.0 0.523646
563 GO:1900272 0.063979 Negative regulation of long-term synaptic potentiation (1) 0.0 1.0 0.517858
36 GO:0000723 0.137533 Telomere maintenance (1) 1.0 1.0 0.515446
524 GO:0007519 0.230417 Skeletal muscle tissue development (1) 2.0 1.0 0.511692
93 GO:0001843 0.082584 Neural tube closure (1) 1.0 1.0 0.491845
226 GO:0035304 0.232643 Regulation of protein dephosphorylation (1) 2.0 1.0 0.490892
684 GO:0051770 0.158964 Positive regulation of nitric-oxide synthase biosynthetic process (1) 0.0 1.0 0.483868
583 GO:0032967 0.182852 Positive regulation of collagen biosynthetic process (1) 0.0 1.0 0.478922
641 GO:0071417 0.494543 Cellular response to organonitrogen compound (1) 3.0 1.0 0.475108
788 GO:0009410 0.407937 Response to xenobiotic stimulus (1) 2.0 1.0 0.474799
412 GO:0009966 0.816584 Regulation of signal transduction (1) 5.0 1.0 0.474348
735 GO:0046718 0.151222 Viral entry into host cell (1) 1.0 1.0 0.472175
266 GO:0030163 0.372430 Protein catabolic process (1) 4.0 1.0 0.453053
196 GO:0006357 0.230696 Regulation of transcription by rna polymerase ii (1) 3.0 1.0 0.447389
840 GO:0043154 0.084113 Negative regulation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0 1.0 0.427212
299 GO:0030705 0.133167 Cytoskeleton-dependent intracellular transport (1) 3.0 1.0 0.424567
433 GO:0097191 0.302625 Extrinsic apoptotic signaling pathway (1) 3.0 1.0 0.420731
610 GO:0031333 0.191284 Negative regulation of protein-containing complex assembly (1) 2.0 1.0 0.417629
501 GO:0007507 0.480118 Heart development (1) 4.0 1.0 0.417365
901 GO:0042110 0.432222 T cell activation (1) 4.0 1.0 0.413843
260 GO:0006807 0.931522 Nitrogen compound metabolic process (1) 7.0 1.0 0.413085
785 GO:0009266 0.178944 Response to temperature stimulus (1) 2.0 1.0 0.412491
358 GO:0007005 0.427023 Mitochondrion organization (1) 3.0 1.0 0.412213
377 GO:0031032 0.257340 Actomyosin structure organization (1) 2.0 1.0 0.410068
484 GO:0007399 0.731066 Nervous system development (1) 6.0 1.0 0.409544
136 GO:0002376 0.760395 Immune system process (1) 6.0 1.0 0.409057
211 GO:0031507 0.200984 Heterochromatin assembly (1) 1.0 1.0 0.406540
20 GO:0070507 0.234661 Regulation of microtubule cytoskeleton organization (1) 2.0 1.0 0.406533
356 GO:0006996 0.787638 Organelle organization (1) 5.0 1.0 0.400534
376 GO:0007015 0.344878 Actin filament organization (1) 3.0 1.0 0.399824
173 GO:0003300 0.147997 Cardiac muscle hypertrophy (1) 2.0 1.0 0.399424
620 GO:0051098 0.363823 Regulation of binding (1) 3.0 1.0 0.394294
922 GO:0036324 0.117627 Vascular endothelial growth factor receptor-2 signaling pathway (1) 0.0 1.0 0.392320
487 GO:0030325 0.087339 Adrenal gland development (1) 0.0 1.0 0.389651
781 GO:0008625 0.179216 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0 1.0 0.375455
608 GO:0043114 0.067682 Regulation of vascular permeability (1) 1.0 1.0 0.368329
103 GO:0042531 0.243583 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0 1.0 0.365635
102 GO:0050731 0.126350 Positive regulation of peptidyl-tyrosine phosphorylation (1) 2.0 1.0 0.364186
504 GO:0048709 0.176248 Oligodendrocyte differentiation (1) 2.0 1.0 0.355145
324 GO:0016241 0.118988 Regulation of macroautophagy (1) 2.0 1.0 0.349328
218 GO:0006468 0.771378 Protein phosphorylation (1) 5.0 1.0 0.348022
171 GO:0003014 0.201637 Renal system process (1) 2.0 1.0 0.341557
644 GO:0071230 0.284292 Cellular response to amino acid stimulus (1) 1.0 1.0 0.332289
321 GO:0010507 0.205494 Negative regulation of autophagy (1) 1.0 1.0 0.332037
533 GO:0030168 0.219923 Platelet activation (1) 2.0 1.0 0.322598
546 GO:0008104 0.599923 Protein localization (1) 5.0 1.0 0.318133
362 GO:0033043 0.644550 Regulation of organelle organization (1) 4.0 1.0 0.315090
851 GO:0070933 0.058559 Histone h4 deacetylation (1) 0.0 1.0 0.313671
460 GO:0023061 0.159325 Signal release (1) 4.0 1.0 0.312808
438 GO:0007179 0.169016 Transforming growth factor beta receptor signaling pathway (1) 1.0 1.0 0.304505
300 GO:0032386 0.384834 Regulation of intracellular transport (1) 2.0 1.0 0.296704
920 GO:0036092 0.063107 Phosphatidylinositol-3-phosphate biosynthetic process (1) 0.0 1.0 0.293434
505 GO:0007283 0.290337 Spermatogenesis (1) 2.0 1.0 0.287750
357 GO:0006997 0.062583 Nucleus organization (1) 2.0 1.0 0.284535
748 GO:0009056 0.627534 Catabolic process (1) 5.0 1.0 0.276960
48 GO:0001525 0.386728 Angiogenesis (1) 2.0 1.0 0.264850
541 GO:0008542 0.131766 Visual learning (1) 0.0 1.0 0.264386
98 GO:0001932 0.669289 Regulation of protein phosphorylation (1) 4.0 1.0 0.263137
734 GO:0051702 0.152779 Biological process involved in interaction with symbiont (1) 2.0 1.0 0.263093
243 GO:0006606 0.250144 Protein import into nucleus (1) 1.0 1.0 0.261493
513 GO:0030900 0.289030 Forebrain development (1) 3.0 1.0 0.257354
673 GO:0043392 0.109472 Negative regulation of dna binding (1) 1.0 1.0 0.255228
346 GO:0006936 0.187431 Muscle contraction (1) 3.0 1.0 0.250863
552 GO:0033365 0.343027 Protein localization to organelle (1) 3.0 1.0 0.239223
9 GO:0043408 0.476159 Regulation of mapk cascade (1) 2.0 1.0 0.238056
887 GO:0034504 0.294393 Protein localization to nucleus (1) 2.0 1.0 0.236099
598 GO:0043254 0.240098 Regulation of protein-containing complex assembly (1) 3.0 1.0 0.234151
280 GO:0006810 0.773180 Transport (1) 7.0 1.0 0.229528
267 GO:0045732 0.069485 Positive regulation of protein catabolic process (1) 2.0 1.0 0.220326
668 GO:0010976 0.084952 Positive regulation of neuron projection development (1) 1.0 1.0 0.216502
212 GO:0051090 0.131395 Regulation of dna-binding transcription factor activity (1) 2.0 1.0 0.215454
799 GO:0009743 0.121060 Response to carbohydrate (1) 2.0 1.0 0.214795
675 GO:0071277 0.074147 Cellular response to calcium ion (1) 0.0 1.0 0.211553
495 GO:0060976 0.094845 Coronary vasculature development (1) 1.0 1.0 0.210353
803 GO:0042475 0.125550 Odontogenesis of dentin-containing tooth (1) 2.0 1.0 0.208932
671 GO:0032092 0.120001 Positive regulation of protein binding (1) 1.0 1.0 0.204806
772 GO:0060485 0.320964 Mesenchyme development (1) 3.0 1.0 0.203758
121 GO:0090050 0.180206 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0 1.0 0.202697
801 GO:0033143 0.078487 Regulation of intracellular steroid hormone receptor signaling pathway (1) 1.0 1.0 0.199503
870 GO:0070527 0.179383 Platelet aggregation (1) 1.0 1.0 0.197119
314 GO:0070588 0.238228 Calcium ion transmembrane transport (1) 4.0 1.0 0.196919
656 GO:0030154 0.829241 Cell differentiation (1) 6.0 1.0 0.193962
645 GO:0071300 0.128940 Cellular response to retinoic acid (1) 0.0 1.0 0.193772
897 GO:0035767 0.138830 Endothelial cell chemotaxis (1) 1.0 1.0 0.192330
332 GO:0097190 0.410188 Apoptotic signaling pathway (1) 4.0 1.0 0.189603
643 GO:0071222 0.143456 Cellular response to lipopolysaccharide (1) 2.0 1.0 0.189188
898 GO:0035924 0.211788 Cellular response to vascular endothelial growth factor stimulus (1) 2.0 1.0 0.184593
459 GO:0007267 0.506882 Cell-cell signaling (1) 5.0 1.0 0.184371
323 GO:0016236 0.245469 Macroautophagy (1) 3.0 1.0 0.183750
317 GO:0006897 0.277768 Endocytosis (1) 3.0 1.0 0.178141
244 GO:0042307 0.225164 Positive regulation of protein import into nucleus (1) 0.0 1.0 0.177873
728 GO:0030032 0.140303 Lamellipodium assembly (1) 1.0 1.0 0.163285
640 GO:0071310 0.661841 Cellular response to organic substance (1) 4.0 1.0 0.156044
686 GO:0097009 0.055448 Energy homeostasis (1) 0.0 1.0 0.155920
692 GO:0010595 0.235403 Positive regulation of endothelial cell migration (1) 2.0 1.0 0.154755
309 GO:0034765 0.277608 Regulation of ion transmembrane transport (1) 4.0 1.0 0.153881
770 GO:0009887 0.516411 Animal organ morphogenesis (1) 4.0 1.0 0.152678
209 GO:0006338 0.181855 Chromatin remodeling (1) 2.0 1.0 0.147943
547 GO:0032880 0.479479 Regulation of protein localization (1) 4.0 1.0 0.144928
918 GO:0046854 0.054745 Phosphatidylinositol phosphate biosynthetic process (1) 1.0 1.0 0.142413
329 GO:0043066 0.610305 Negative regulation of apoptotic process (1) 4.0 1.0 0.132954
824 GO:0071363 0.407544 Cellular response to growth factor stimulus (1) 3.0 1.0 0.122808
603 GO:0061045 0.068278 Negative regulation of wound healing (1) 2.0 1.0 0.122775
225 GO:0006470 0.305848 Protein dephosphorylation (1) 3.0 1.0 0.111956
388 GO:0051726 0.483775 Regulation of cell cycle (1) 5.0 1.0 0.109432
516 GO:0007423 0.372363 Sensory organ development (1) 3.0 1.0 0.106514
261 GO:0051171 0.836045 Regulation of nitrogen compound metabolic process (1) 6.0 1.0 0.100916
375 GO:0051496 0.076120 Positive regulation of stress fiber assembly (1) 0.0 1.0 0.097110
758 GO:0031099 0.247922 Regeneration (1) 2.0 1.0 0.096133
55 GO:0001649 0.136472 Osteoblast differentiation (1) 1.0 1.0 0.091104
227 GO:0032516 0.155024 Positive regulation of phosphoprotein phosphatase activity (1) 0.0 1.0 0.090968
191 GO:0045944 0.420524 Positive regulation of transcription by rna polymerase ii (1) 2.0 1.0 0.084844
283 GO:0051049 0.611523 Regulation of transport (1) 5.0 1.0 0.084359
739 GO:1902903 0.245529 Regulation of supramolecular fiber organization (1) 3.0 1.0 0.083258
569 GO:0030307 0.190101 Positive regulation of cell growth (1) 2.0 1.0 0.078819
679 GO:0042310 0.060677 Vasoconstriction (1) 1.0 1.0 0.075349
405 GO:0007159 0.162103 Leukocyte cell-cell adhesion (1) 3.0 1.0 0.074992
566 GO:0032879 0.660318 Regulation of localization (1) 6.0 1.0 0.074219
568 GO:0008284 0.513916 Positive regulation of cell population proliferation (1) 2.0 1.0 0.073069
561 GO:0048511 0.251652 Rhythmic process (1) 3.0 1.0 0.067632
702 GO:0048146 0.179210 Positive regulation of fibroblast proliferation (1) 0.0 1.0 0.067004
403 GO:0033628 0.059919 Regulation of cell adhesion mediated by integrin (1) 1.0 1.0 0.061975
885 GO:1900180 0.205300 Regulation of protein localization to nucleus (1) 1.0 1.0 0.060202
228 GO:0006508 0.347532 Proteolysis (1) 4.0 1.0 0.056727
560 GO:0043473 0.188137 Pigmentation (1) 2.0 1.0 0.036612
777 GO:0050680 0.103322 Negative regulation of epithelial cell proliferation (1) 2.0 1.0 0.036383
751 GO:0046034 0.094236 Atp metabolic process (1) 2.0 1.0 0.034832
591 GO:0010941 0.705559 Regulation of cell death (1) 5.0 1.0 0.026616
818 GO:0010243 0.557617 Response to organonitrogen compound (1) 4.0 1.0 0.022040
724 GO:0036473 0.154959 Cell death in response to oxidative stress (1) 2.0 1.0 0.021429
153 GO:0050776 0.424435 Regulation of immune response (1) 4.0 1.0 0.011047
521 GO:0035051 0.187551 Cardiocyte differentiation (1) 2.0 1.0 0.003905
194 GO:0006355 0.544373 Regulation of transcription, dna-templated (1) 4.0 1.0 0.003607
622 GO:0043086 0.212387 Negative regulation of catalytic activity (1) 3.0 1.0 -0.003920
494 GO:0060840 0.094489 Artery development (1) 2.0 1.0 -0.013518
936 GO:0051000 0.059868 Positive regulation of nitric-oxide synthase activity (1) 0.0 1.0 -0.017649
382 GO:0030041 0.093571 Actin filament polymerization (1) 2.0 1.0 -0.024295
712 GO:0033002 0.280632 Muscle cell proliferation (1) 2.0 1.0 -0.031117
750 GO:0044281 0.305872 Small molecule metabolic process (1) 5.0 1.0 -0.053702
257 GO:0046488 0.135113 Phosphatidylinositol metabolic process (1) 2.0 1.0 -0.069795
731 GO:0031529 0.036028 Ruffle organization (1) 1.0 1.0 -0.085835
469 GO:0060173 0.072132 Limb development (1) 1.0 1.0 -0.088184
296 GO:0015031 0.292221 Protein transport (1) 4.0 1.0 -0.098692
927 GO:0042632 0.034870 Cholesterol homeostasis (1) 0.0 1.0 -0.119705
78 GO:0032760 0.100402 Positive regulation of tumor necrosis factor production (1) 0.0 1.0 -0.132748
856 GO:0019722 0.034146 Calcium-mediated signaling (1) 2.0 1.0 -0.141431
288 GO:0032940 0.217398 Secretion by cell (1) 5.0 1.0 -0.147194
705 GO:0051353 0.082787 Positive regulation of oxidoreductase activity (1) 1.0 1.0 -0.158575
176 GO:0044262 0.129234 Cellular carbohydrate metabolic process (1) 3.0 1.0 -0.190099
180 GO:0019318 0.046251 Hexose metabolic process (1) 2.0 1.0 -0.211363
440 GO:0030512 0.045444 Negative regulation of transforming growth factor beta receptor signaling pathway (1) 0.0 1.0 -0.229806
331 GO:0071887 0.072964 Leukocyte apoptotic process (1) 2.0 1.0 -0.295526
823 GO:0045471 0.068117 Response to ethanol (1) 1.0 1.0 -0.639920
232 GO:0010951 0.025246 Negative regulation of endopeptidase activity (1) 2.0 1.0 -0.917276
Code
names2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["Name"].head(30))
terms2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["GO_term"].head(30))
logits2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["delta_logits"].head(30))

names2 = [x[:-4] for x in names2] 
Code
for i in range(0,len(names2)):
    print(terms2[i],names2[i],logits2[i])
GO:0031047 Gene silencing by rna 2.192457619336144
GO:0031648 Protein destabilization 2.092833916210919
GO:0071900 Regulation of protein serine/threonine kinase activity 1.944569179670069
GO:0000423 Mitophagy 1.8800065969407627
GO:0048266 Behavioral response to pain 1.8570307139212263
GO:0090630 Activation of gtpase activity 1.793943480641404
GO:0045727 Positive regulation of translation 1.7460704310285706
GO:0018108 Peptidyl-tyrosine phosphorylation 1.6728257190053135
GO:0043244 Regulation of protein-containing complex disassembly 1.6460980615310405
GO:0006612 Protein targeting to membrane 1.6268405228374492
GO:0006469 Negative regulation of protein kinase activity 1.5937943848967007
GO:0034976 Response to endoplasmic reticulum stress 1.575980420814227
GO:0071353 Cellular response to interleukin-4 1.5752555355925209
GO:0033554 Cellular response to stress 1.573975479674923
GO:0033993 Response to lipid 1.5515734212063519
GO:0001817 Regulation of cytokine production 1.5347154822720621
GO:0007026 Negative regulation of microtubule depolymerization 1.5337813437727923
GO:0007565 Female pregnancy 1.4788810435976616
GO:0046777 Protein autophosphorylation 1.453821209026271
GO:0060179 Male mating behavior 1.4409006464912832
GO:0000422 Autophagy of mitochondrion 1.4098807478143418
GO:0030216 Keratinocyte differentiation 1.406529668352229
GO:0048812 Neuron projection morphogenesis 1.3850039160074457
GO:0001553 Luteinization 1.3733255324441331
GO:0002718 Regulation of cytokine production involved in immune response 1.3728274508963876
GO:0042060 Wound healing 1.325910525701464
GO:0060632 Regulation of microtubule-based movement 1.3061039503079361
GO:0065003 Protein-containing complex assembly 1.304383769510548
GO:0060020 Bergmann glial cell differentiation 1.2886961413918279
GO:1902459 Positive regulation of stem cell population maintenance 1.2787216247479065
Code
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# set font
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'

# set the style of the axes and the text color
plt.rcParams['axes.edgecolor']='#333F4B'
plt.rcParams['axes.linewidth']=0.8
plt.rcParams['xtick.color']='#333F4B'
plt.rcParams['ytick.color']='#333F4B'
plt.rcParams['text.color']='#333F4B'


# create some fake data
percentages = pd.Series(logits2, 
                        index=names2)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')

# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))

fig, ax = plt.subplots(figsize=(4,17))

# create for each expense type an horizontal line that starts at x = 0 with the length 
# represented by the specific expense percentage value.
plt.hlines(y=my_range, xmin=0, xmax=df['percentage'], color='#208EA3', alpha=0.2, linewidth=14)

# create for each expense type a dot at the level of the expense percentage value
plt.plot(df['percentage'], my_range, "o", markersize=14, color='#208EA3', alpha=0.8)

# set labels
ax.set_xlabel(' Δlogit', fontsize=25, fontweight='black', color = '#36382E')
ax.set_ylabel('')
ax.set_facecolor(color="white")
ax.set_alpha(1)

# set axis
ax.tick_params(axis='both', which='major', labelsize=30)
plt.yticks(my_range, df.index)

# add an horizonal label for the y axis 
fig.text(-0.58, 0.862, 'MoA (GO terms)', fontsize=27, fontweight='black', color = '#36382E')
fig.text(0.2, 0.9, selected_drug_u_name.capitalize(), fontsize=30, fontweight='black', color = '#36382E')


# change the style of the axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.spines['left'].set_bounds((1, len(my_range)))
ax.set_xlim(0,max(logits2)+0.1)

ax.spines['left'].set_position(('outward', 8))
ax.spines['bottom'].set_position(('outward', 5))

plt.savefig(resultsdir+selected_drug_u_name+'_top_terms.png', dpi=300, bbox_inches='tight')

For known drug…

Code
display(combobox)
Code
selected_drug_name = combobox.result
Code
# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(platt_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns  = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")
Code
train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)
GO_term probability Name layer_number
423 GO:1902533 0.825532 Positive regulation of intracellular signal transduction (1) 2.0
99 GO:0001934 0.823688 Positive regulation of protein phosphorylation (1) 3.0
633 GO:0051301 0.817491 Cell division (1) 2.0
224 GO:0046777 0.780375 Protein autophosphorylation (1) 1.0
8 GO:0000165 0.778813 Mapk cascade (1) 3.0
624 GO:0010628 0.741437 Positive regulation of gene expression (1) 3.0
253 GO:0043552 0.735601 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0
437 GO:0007169 0.646597 Transmembrane receptor protein tyrosine kinase signaling pathway (1) 3.0
653 GO:0072593 0.631785 Reactive oxygen species metabolic process (1) 3.0
894 GO:0048017 0.609803 Inositol lipid-mediated signaling (1) 1.0
578 GO:2000379 0.603184 Positive regulation of reactive oxygen species metabolic process (1) 1.0
24 GO:0007346 0.596523 Regulation of mitotic cell cycle (1) 3.0
573 GO:0010629 0.592396 Negative regulation of gene expression (1) 3.0
908 GO:0051899 0.591286 Membrane depolarization (1) 2.0
906 GO:0043549 0.586633 Regulation of kinase activity (1) 3.0
105 GO:0045860 0.585544 Positive regulation of protein kinase activity (1) 2.0
12 GO:0070374 0.584467 Positive regulation of erk1 and erk2 cascade (1) 0.0
74 GO:0001817 0.527795 Regulation of cytokine production (1) 3.0
44 GO:0048812 0.526118 Neuron projection morphogenesis (1) 3.0
853 GO:0038083 0.500000 Peptidyl-tyrosine autophosphorylation (1) 0.0
223 GO:0018108 0.493038 Peptidyl-tyrosine phosphorylation (1) 3.0
9 GO:0043408 0.472479 Regulation of mapk cascade (1) 2.0
570 GO:0045597 0.464440 Positive regulation of cell differentiation (1) 3.0
333 GO:1904019 0.457451 Epithelial cell apoptotic process (1) 1.0
702 GO:0048146 0.440487 Positive regulation of fibroblast proliferation (1) 0.0
80 GO:0010575 0.438910 Positive regulation of vascular endothelial growth factor production (1) 0.0
106 GO:0071900 0.437003 Regulation of protein serine/threonine kinase activity (1) 2.0
141 GO:0050900 0.435245 Leukocyte migration (1) 3.0
10 GO:0051403 0.429898 Stress-activated mapk cascade (1) 2.0
11 GO:0043406 0.426305 Positive regulation of map kinase activity (1) 1.0
Code
# # For known drugs
len((set(train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)["GO_term"])).intersection(set(pd.DataFrame(compounds_GOterms_matches[selected_drug_name])[1])))
30
Code
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_logs["GO_term"],selected_drug_name], y=train_drug_logs.set_index("GO_term")["probability"], data=plot,showfliers=True )

Code
# same as before
sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori= np.log(sum_annotations/(1-sum_annotations))
Code
logits_apost= np.log(train_drug_logs["probability"]/(1-train_drug_logs["probability"]))
delta_logits = logits_apost.to_numpy()-logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
train_drug_mod = train_drug_logs.merge(delta_logits_df, left_index=True,right_index=True)
Code
train_drug_mod.loc[train_drug_mod["layer_number"] <=3].sort_values(by=["delta_logits"], ascending=False).head(30)
GO_term probability Name layer_number delta_logits
578 GO:2000379 0.603184 Positive regulation of reactive oxygen species metabolic process (1) 1.0 2.770126
253 GO:0043552 0.735601 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0 2.707570
80 GO:0010575 0.438910 Positive regulation of vascular endothelial growth factor production (1) 0.0 2.282492
633 GO:0051301 0.817491 Cell division (1) 2.0 2.245231
224 GO:0046777 0.780375 Protein autophosphorylation (1) 1.0 2.053782
423 GO:1902533 0.825532 Positive regulation of intracellular signal transduction (1) 2.0 2.014408
458 GO:0035025 0.374269 Positive regulation of rho protein signal transduction (1) 0.0 1.952270
848 GO:0071670 0.345148 Smooth muscle cell chemotaxis (1) 0.0 1.887644
348 GO:0006939 0.412178 Smooth muscle contraction (1) 2.0 1.842256
350 GO:0045987 0.348868 Positive regulation of smooth muscle contraction (1) 1.0 1.783402
908 GO:0051899 0.591286 Membrane depolarization (1) 2.0 1.675960
653 GO:0072593 0.631785 Reactive oxygen species metabolic process (1) 3.0 1.650121
115 GO:0060312 0.275354 Regulation of blood vessel remodeling (1) 0.0 1.625762
926 GO:0060020 0.369360 Bergmann glial cell differentiation (1) 0.0 1.614859
923 GO:0048170 0.295742 Positive regulation of long-term neuronal synaptic plasticity (1) 0.0 1.598558
99 GO:0001934 0.823688 Positive regulation of protein phosphorylation (1) 3.0 1.593723
853 GO:0038083 0.500000 Peptidyl-tyrosine autophosphorylation (1) 0.0 1.588712
713 GO:0035726 0.210551 Common myeloid progenitor cell proliferation (1) 0.0 1.577983
445 GO:0048008 0.414654 Platelet-derived growth factor receptor signaling pathway (1) 1.0 1.552362
857 GO:0035584 0.298388 Calcium-mediated signaling using intracellular calcium source (1) 0.0 1.496390
333 GO:1904019 0.457451 Epithelial cell apoptotic process (1) 1.0 1.481165
933 GO:0051150 0.337253 Regulation of smooth muscle cell differentiation (1) 1.0 1.474264
352 GO:0014827 0.254529 Intestine smooth muscle contraction (1) 0.0 1.453480
814 GO:0090037 0.235572 Positive regulation of protein kinase c signaling (1) 0.0 1.416277
894 GO:0048017 0.609803 Inositol lipid-mediated signaling (1) 1.0 1.399588
506 GO:0007286 0.240113 Spermatid development (1) 1.0 1.376021
742 GO:0035733 0.239304 Hepatic stellate cell activation (1) 0.0 1.371582
10 GO:0051403 0.429898 Stress-activated mapk cascade (1) 2.0 1.369506
702 GO:0048146 0.440487 Positive regulation of fibroblast proliferation (1) 0.0 1.349525
782 GO:1902042 0.289337 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0 1.347889
Code
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_mod["GO_term"],selected_drug_name], y=train_drug_mod.set_index("GO_term")["delta_logits"], data=plot,showfliers=True)

SVM GO TERM 2D representation

Code
from sklearn.manifold import TSNE
import plotly.express as px

Choose go to study…

Code
display(combobox_go)
Code
selected_goterm = combobox_go.result
Code
real_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]
GO_term Name layer_number
4338 GO:0071353_1 Cellular response to interleukin-4 (1) 1.0
Code
list_nodes = []
for i in range(1,7):
    list_nodes.append(selected_goterm+"_"+str(i))

score = attribution_data_annotated.loc[list_nodes].T
score_mod = score.divide(score.std()).fillna(0) 
annotations  =slim_matrix_single_neuron.loc[selected_goterm,]
y_predicted = models_svm[selected_goterm].predict(score_mod.astype(float))

Plot SVM

View statistics of GOterm

“Perfect” model (with train data)

Code
auc = metrics.roc_auc_score(annotations, models_svm[selected_goterm].decision_function(score_mod.astype(float)))
cnf_matrix = metrics.confusion_matrix(annotations,y_predicted)
print(cnf_matrix)

print("Accuracy:",metrics.accuracy_score(annotations, y_predicted))
print("Precision:",metrics.precision_score(annotations,y_predicted)) # TP / (TP+FP)
print("Recall:",metrics.recall_score(annotations, y_predicted)) #TP / (TP+FN)
print("AUC with score:",auc) 
[[206   6]
 [  2  16]]
Accuracy: 0.9652173913043478
Precision: 0.7272727272727273
Recall: 0.8888888888888888
AUC with score: 0.9855870020964361

TN - FP

FN - TP

En mi opinion interesa mucho el precision, prefiero que haya menos FP no??

Test statistics…

Code
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_goterm],  platt_matrix.loc[selected_goterm])
cnf_matrix = metrics.confusion_matrix(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])
print(cnf_matrix)

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)
[[203   9]
 [  4  14]]
Accuracy: 0.9434782608695652
Precision: 0.6086956521739131
Recall: 0.7777777777777778
AUC with score: 0.9095911949685536
Code
import colorlover as cl
matrix = metrics.confusion_matrix(annotations,y_predicted)
tn, fp, fn, tp = matrix.ravel()

values = [tp, fn, fp, tn]
label_text = ["True Positive", "False Negative", "False Positive", "True Negative"]
labels = ["<b>TP</b>", "<b>FN</b>", "<b>FP</b>", "<b>TN</b>"]
blue = cl.flipper()["seq"]["9"]["Blues"]
red = cl.flipper()["seq"]["9"]["Reds"]
colors = ["#ff3700","#FFA0A0", "#CCE9FF",  "#0b8bff"]
trace0 = go.Pie(
    labels=label_text,
    values=values,
    hoverinfo="label+value+percent",
    textinfo="text+value",
    text=labels,
    sort=False,
    marker=dict(colors=colors),
    insidetextfont={"color": "#36382E"},
    rotation=90,
)

layout = go.Layout(
    title=dict(text="Confusion Matrix",
              x=0.3,
              y=0.8,
              font=dict(size=14),
              xanchor='center',
              yanchor='top'),
    #margin=dict(l=50, r=50, t=100, b=10),
    legend=dict(font={"color": "#36382E"}, orientation="h",x=0.1, y=-0.03),
#    plot_bgcolor="#282b38",
#    paper_bgcolor="#282b38",
    font=dict(family='Roboto',color= "#36382E"),
)

data = [trace0]
figure = go.Figure(data=data, layout=layout)
figure
Code
y_test=annotations
decision_test=y_predicted
fpr, tpr, threshold = metrics.roc_curve(y_test, decision_test)

# AUC Score
auc_score = metrics.roc_auc_score(y_true=y_test, y_score=decision_test)

trace0 = go.Scatter(
    x=fpr, y=tpr, mode="lines", name="Test Data", marker={"color": "#ff3700"}
)

layout = go.Layout(
    title=dict(text=f"ROC Curve (AUC = {auc_score:.3f})",
            x=0.6,
            y=0.5,
            font=dict(size=20)
              ),
    xaxis=dict(title="False Positive Rate", gridcolor="white"),
    yaxis=dict(title="True Positive Rate", gridcolor="white"),
    legend=dict(x=0, y=1.05, orientation="h"),
    margin=dict(l=100, r=10, t=25, b=40),
#    plot_bgcolor="#282b38",
#    paper_bgcolor="#282b38",
    font=dict(family='Roboto',color= "#36382E"),
)

data = [trace0]
figure = go.Figure(data=data, layout=layout)
figure

Plot SVM with unknown labels

Voronoi Tessellation

What is a Voronoi Tessellation? Given a set P := {p1, …, pn} of sites, a Voronoi Tessellation is a subdivision of the space into n cells, one for each site in P, with the property that a point q lies in the cell corresponding to a site pi iff d(pi, q) < d(pj, q) for i distinct from j. The segments in a Voronoi Tessellation correspond to all points in the plane equidistant to the two nearest sites. Voronoi Tessellations have applications in computer science.

https://stackoverflow.com/questions/61225052/svm-plot-decision-surface-when-working-with-more-than-2-features

Code
tsne = TSNE(n_components=2, verbose=0,
           init="pca",
            perplexity=30,
            random_state=123
           )
z = tsne.fit_transform(score_mod.astype(float)) 
C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning:

The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.
Code
list_nodes = list(models_svm[selected_goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)
score_unknown = attribution_data_all.loc[list_nodes,unknown].T
score_unknown_mod = score_unknown.divide(score.std()).fillna(0) # normalize
Code
y_unknown = np.full(score_unknown_mod.shape[0],2) # 2=unknown MOA
y_pred_unknown = models_svm[selected_goterm].predict(score_unknown_mod.astype(float))
Code
# join scores and annotations from known and unknown drugs
all_score = pd.concat([score_mod,score_unknown_mod])
all_y = np.concatenate((annotations,y_unknown))  # 2=unknown MOA

Plot T-SNE SVM

Code
from sklearn.neighbors._classification import KNeighborsClassifier
# https://github.com/plotly/dash-sample-apps/blob/main/apps/dash-svm/utils/dash_reusable_components.py
z = tsne.fit_transform(all_score.astype(float)) 
df = pd.DataFrame()
df["y"] = all_y
df["comp-1"] = z[:,0]
df["comp-2"] = z[:,1]
df["name"] =list(all_score.index)
df = df.sort_values(by=['y'])
df["y"] = df["y"].astype(str)
X,y = all_score.astype(float), all_y
y_predicted = models_svm[selected_goterm].predict(X)

resolution = 300 # 100x100 background pixels
X2d_xmin, X2d_xmax = np.min(z[:,0])-1, np.max(z[:,0])+1
X2d_ymin, X2d_ymax = np.min(z[:,1])-1, np.max(z[:,1])+1
xx, yy = np.meshgrid(np.linspace(X2d_xmin, X2d_xmax, resolution), np.linspace(X2d_ymin, X2d_ymax, resolution))

# approximate Voronoi tesselation on resolution x resolution grid using 1-NN
background_model = KNeighborsClassifier(n_neighbors=1).fit(z, y_predicted) 
voronoiBackground = background_model.predict(np.c_[xx.ravel(), yy.ravel()])
voronoiBackground = voronoiBackground.reshape((resolution, resolution))
C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning:

The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\neighbors\_classification.py:228: FutureWarning:

Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
Code
go_name=real_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]["Name"].values[0][:-4]
go_name
'Cellular response to interleukin-4'
Code
bright_cscale = [[0, "#0b8bff"], [0.5, "#ff3700"],[1, "#36382E"]]
new_cscale = [[0, "#CCE9FF"], [1, "#FFA0A0"]]

trace0 = go.Contour(
        x=xx.flatten(),
        y=yy.flatten(),
        z=voronoiBackground.flatten(),
        hoverinfo="none",
        showscale=False,
        contours=dict(showlines=False),
        colorscale=new_cscale,
        opacity=0.9,
    )
    
trace1 = go.Contour(
    x=xx.flatten(),
    y=yy.flatten(),
    z=voronoiBackground.flatten(),
    showscale=False,
    hoverinfo="none",
    colorscale=new_cscale,
    line=dict(color="#ff3700"),
    )

trace2 = go.Scatter(
    x=df["comp-1"],
    y=df["comp-2"],
    mode="markers",
    text=df["name"].to_numpy(),
    marker=dict(size=7, color=df["y"].to_numpy(int),colorscale=bright_cscale),
    showlegend=False
)

legend1 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Not annotated to<br>"+selected_goterm,
    marker=dict(size=7, color="#0b8bff",symbol='circle'),
)

legend2 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Drug annotated to<br>"+selected_goterm,
    marker=dict(size=7, color="#ff3700",symbol='circle'),
)

legend3 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Unknown MOA<br>annotations",
    marker=dict(size=7, color="#36382E",symbol='circle'),
)

    
layout = go.Layout(
   title=dict(text="<b>"+selected_goterm+"</b> "+go_name,
              x=0.5,
              y=0.92,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis_range=[min(yy.flatten()),max(yy.flatten())],
    xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=0, y=0, orientation="h",font=dict(size=14)),
    paper_bgcolor='rgba(0,0,0,0)',
    width=600, height=800,
    font=dict(family='Roboto',color= "#36382E",size=15)
    )
data = [trace0,trace1,trace2,legend2,legend1,legend3]
figure = go.Figure(data=data,layout=layout)




figure